From 395866c98c64e3f6f7b7bea907923edd1ad158bf Mon Sep 17 00:00:00 2001 From: Jorge Esteban Quilcate Otoya Date: Thu, 17 Mar 2022 12:17:36 +0000 Subject: [PATCH 01/28] feat(cfk): deployment and grafana dashboards --- .../cfk/confluent-platform.yaml | 730 ++ .../grafana/confluent-platform.json | 2083 ++++++ .../grafana/kafka-cluster.json | 6048 +++++++++++++++++ .../grafana/kafka-connect-cluster.json | 5618 +++++++++++++++ .../grafana/kafka-topics.json | 1061 +++ .../grafana/ksqldb-cluster.json | 3586 ++++++++++ .../grafana/schema-registry-cluster.json | 994 +++ .../grafana/zookeeper-cluster.json | 1048 +++ 8 files changed, 21168 insertions(+) create mode 100644 cfk-prometheus-grafana/cfk/confluent-platform.yaml create mode 100644 cfk-prometheus-grafana/grafana/confluent-platform.json create mode 100644 cfk-prometheus-grafana/grafana/kafka-cluster.json create mode 100644 cfk-prometheus-grafana/grafana/kafka-connect-cluster.json create mode 100644 cfk-prometheus-grafana/grafana/kafka-topics.json create mode 100644 cfk-prometheus-grafana/grafana/ksqldb-cluster.json create mode 100644 cfk-prometheus-grafana/grafana/schema-registry-cluster.json create mode 100644 cfk-prometheus-grafana/grafana/zookeeper-cluster.json diff --git a/cfk-prometheus-grafana/cfk/confluent-platform.yaml b/cfk-prometheus-grafana/cfk/confluent-platform.yaml new file mode 100644 index 00000000..07ab20c7 --- /dev/null +++ b/cfk-prometheus-grafana/cfk/confluent-platform.yaml @@ -0,0 +1,730 @@ +--- +apiVersion: platform.confluent.io/v1beta1 +kind: Zookeeper +metadata: + name: zookeeper + namespace: confluent +spec: + replicas: 3 + image: + application: confluentinc/cp-zookeeper:7.0.1 + init: confluentinc/confluent-init-container:2.2.0-1 + dataVolumeCapacity: 10Gi + logVolumeCapacity: 10Gi + metrics: + prometheus: + rules: + - labels: + client_address: $4 + connection_id: $5 + member_type: $3 + server_id: $2 + server_name: $1 + name: zookeeper_connections_$6 + pattern: org.apache.ZooKeeperService<>([^:]+) + valueFactor: 1 + - labels: + member_type: $3 + server_id: $2 + server_name: $1 + name: zookeeper_$4 + pattern: 'org.apache.ZooKeeperService<>(\w+): + (\d+)' + valueFactor: 1 + - labels: + member_type: $3 + server_id: $2 + server_name: $1 + name: zookeeper_inmemorydatatree_$4 + pattern: 'org.apache.ZooKeeperService<>(WatchCount|NodeCount): (\d+)' + type: GAUGE + valueFactor: 1 + - labels: + $3: $4 + server_id: $2 + server_name: $1 + name: zookeeper_status + pattern: 'org.apache.ZooKeeperService<>(.+): + (.+)' + type: UNTYPED + value: "1" + valueFactor: 1 + - labels: + server_id: $1 + name: zookeeper_status_$2 + pattern: 'org.apache.ZooKeeperService<>(QuorumSize): + (\d+)' + type: GAUGE + valueFactor: 1 + - labels: + server_id: "1" + server_name: $1 + name: zookeeper_inmemorydatatree_$2 + pattern: 'org.apache.ZooKeeperService<>(WatchCount|NodeCount): + (\d+)' + type: GAUGE + valueFactor: 1 + - labels: + client_address: $2 + connection_id: $3 + server_name: $1 + name: zookeeper_connections_$4 + pattern: org.apache.ZooKeeperService<>([^:]+) + valueFactor: 1 + - labels: + $2: $3 + server_name: $1 + name: zookeeper_$2 + pattern: 'org.apache.ZooKeeperService<>(StartTime|ClientPort|SecureClientAddress|Version|SecureClientPort): + (.+)' + value: "1" + valueFactor: 1 + - name: zookeeper_$2 + pattern: 'org.apache.ZooKeeperService<>(.+): (.+)' + type: GAUGE + valueFactor: 1 + whitelist: + - org.apache.ZooKeeperService:name3=Connections,* + - org.apache.ZooKeeperService:name3=InMemoryDataTree,* + - org.apache.ZooKeeperService:name0=*,name1=replica*,name2=* + - org.apache.ZooKeeperService:name0=*,name1=replica* + - org.apache.ZooKeeperService:name0=* + - org.apache.ZooKeeperService:name1=InMemoryDataTree,name0=* + - org.apache.ZooKeeperService:name0=*,name1=Connections,name2=*,name3=* + +--- +apiVersion: platform.confluent.io/v1beta1 +kind: Kafka +metadata: + name: kafka + namespace: confluent +spec: + replicas: 3 + image: + application: confluentinc/cp-server:7.0.1 + init: confluentinc/confluent-init-container:2.2.0-1 + dataVolumeCapacity: 10Gi + metricReporter: + enabled: true + metrics: + prometheus: + blacklist: + - kafka.consumer:type=*,id=* + - kafka.consumer:type=*,client-id=* + - kafka.consumer:type=*,client-id=*,node-id=* + - kafka.producer:type=*,id=* + - kafka.producer:type=*,client-id=* + - kafka.producer:type=*,client-id=*,node-id=* + - kafka.*:type=kafka-metrics-count,* + - kafka.admin.client:* + - kafka.server:type=*,cipher=*,protocol=*,listener=*,networkProcessor=* + - kafka.server:type=* + rules: + - labels: + partition: $5 + topic: $4 + name: kafka_$1_$2_$3 + pattern: kafka.(\w+)<>Value + type: GAUGE + valueFactor: "1" + - labels: + clientId: $3 + partition: $5 + topic: $4 + name: kafka_server_$1_$2 + pattern: kafka.server<>Value + type: GAUGE + valueFactor: "1" + - labels: + broker: $4:$5 + clientId: $3 + name: kafka_server_$1_$2 + pattern: kafka.server<>Value + type: GAUGE + valueFactor: "1" + - labels: + $4: $5 + $6: $7 + name: kafka_$1_$2_$3 + pattern: kafka.(\w+)<>(Count|Value) + valueFactor: "1" + - labels: + $4: $5 + $6: $7 + quantile: 0.$8 + name: kafka_$1_$2_$3 + pattern: kafka.(\w+)<>(\d+)thPercentile + type: GAUGE + valueFactor: "1" + - labels: + $3: $4 + $5: $6 + $7: $8 + name: kafka_$1_$2 + pattern: kafka.(\w+)<>Value + valueFactor: "1" + - labels: + $4: $5 + name: kafka_$1_$2_$3 + pattern: kafka.(\w+)<>(Count|Value) + valueFactor: "1" + - labels: + $3: $4 + $5: $6 + name: kafka_$1_$2 + pattern: kafka.(\w+)<>(Count|Value) + valueFactor: "1" + - name: kafka_$1_$2_$3 + pattern: kafka.(\w+)<>(Count|Value) + valueFactor: "1" + - labels: + $3: $4 + name: kafka_$1_$2 + pattern: kafka.(\w+)<>Value + valueFactor: "1" + - name: kafka_server_kafkarequesthandlerpool_requesthandleravgidlepercent_total + pattern: kafka.server<>OneMinuteRate + type: GAUGE + valueFactor: "1" + - labels: + client_software_name: $1 + client_software_version: $2 + listener: $3 + network_processor: $4 + name: kafka_server_socketservermetrics_connections + pattern: kafka.server<>connections + type: GAUGE + valueFactor: "1" + - labels: + listener: $1 + network_processor: $2 + name: kafka_server_socketservermetrics_$3 + pattern: 'kafka.server<>(.+):' + type: GAUGE + valueFactor: "1" + - name: kafka_coordinator_$1_$2_$3 + pattern: kafka.coordinator.(\w+)<>(Count|Value) + valueFactor: "1" + - labels: + $4: $5 + quantile: 0.$6 + name: kafka_$1_$2_$3 + pattern: kafka.(\w+)<>(\d+)thPercentile + type: GAUGE + valueFactor: "1" + - labels: + quantile: 0.$4 + name: kafka_$1_$2_$3 + pattern: kafka.(\w+)<>(\d+)thPercentile + type: GAUGE + valueFactor: "1" + - labels: + $3: $4 + $5: $6 + $7: $8 + name: confluent_$1_$2 + pattern: confluent.(\w+)<>Value + type: GAUGE + valueFactor: "1" + - labels: + $3: $4 + $5: $6 + name: confluent_$1_$2 + pattern: confluent.(.+)<>Value + type: GAUGE + valueFactor: "1" + - labels: + $3: $4 + name: confluent_$1_$2 + pattern: confluent.(.+)<>Value + type: GAUGE + valueFactor: "1" + - labels: + client-id: $3 + user: $2 + name: kafka_server_$1_$4 + pattern: 'kafka.server<>(.+):' + type: GAUGE + valueFactor: "1" + - labels: + user: $2 + name: kafka_server_$1_$3 + pattern: 'kafka.server<>(.+):' + type: GAUGE + valueFactor: "1" + - labels: + client-id: $2 + name: kafka_server_$1_$3 + pattern: 'kafka.server<>(.+):' + type: GAUGE + valueFactor: "1" +--- +apiVersion: platform.confluent.io/v1beta1 +kind: SchemaRegistry +metadata: + name: schemaregistry + namespace: confluent +spec: + replicas: 3 + image: + application: confluentinc/cp-schema-registry:7.0.1 + init: confluentinc/confluent-init-container:2.2.0-1 + metrics: + prometheus: + blacklist: + - kafka.producer:type=app-info,client-id=* + - kafka.consumer:type=app-info,client-id=* + - kafka.admin.client:* + - kafka.consumer:type=*,id=* + - kafka.producer:type=*,id=* + - kafka.*:type=kafka-metrics-count,* + rules: + - name: kafka_schema_registry_jetty_metrics_$1 + pattern: 'kafka.schema.registry([^:]+):' + valueFactor: 1 + - name: kafka_schema_registry_jersey_metrics_$1 + pattern: 'kafka.schema.registry([^:]+):' + valueFactor: 1 + - labels: + $2: $3 + client-id: $1 + name: kafka_schema_registry_app_info + pattern: 'kafka.schema.registry<>(.+): (.+)' + type: UNTYPED + value: "1" + valueFactor: 1 + - name: kafka_schema_registry_registered_count + pattern: 'kafka.schema.registry([^:]+):' + valueFactor: 1 + - labels: + schema_type: $1 + name: kafka_schema_registry_schemas_$2 + pattern: 'kafka.schema.registry([^:]+):' + valueFactor: 1 + - labels: + client_id: $2 + name: kafka_schema_registry_$1_$3 + pattern: 'kafka.schema.registry<>([^:]+):' + valueFactor: 1 + - labels: + $3: $4 + client_id: $2 + client_type: $1 + name: kafka_$1_app_info + pattern: 'kafka.(.+)<>(.+): (.+)' + type: UNTYPED + value: "1" + valueFactor: 1 + - labels: + $3: $4 + $5: $6 + $7: $8 + client_type: $1 + name: kafka_$1_$2_$9 + pattern: 'kafka.(.+)<>(.+):' + type: GAUGE + valueFactor: 1 + - labels: + $3: $4 + $5: $6 + client_type: $1 + name: kafka_$1_$2_$7 + pattern: 'kafka.(.+)<>(.+):' + type: GAUGE + valueFactor: 1 + - labels: + $3: $4 + client_type: $1 + name: kafka_$1_$2_$5 + pattern: 'kafka.(.+)<>(.+):' + type: GAUGE + valueFactor: 1 + - labels: + client_type: $1 + name: kafka_$1_$2_$3 + pattern: 'kafka.(.+)<>(.+):' + valueFactor: 1 + whitelist: + - kafka.schema.registry:type=jetty-metrics + - kafka.schema.registry:type=jersey-metrics + - kafka.schema.registry:type=app-info,id=* + - kafka.schema.registry:type=registered-count + - kafka.schema.registry:type=json-schema* + - kafka.schema.registry:type=protobuf-schemas* + - kafka.schema.registry:type=avro-schemas* + - kafka.schema.registry:type=kafka.schema.registry-metrics,client-id=* + - kafka.schema.registry:type=kafka.schema.registry-coordinator-metrics,client-id=* + - kafka.consumer:* + - kafka.producer:* + +--- +apiVersion: platform.confluent.io/v1beta1 +kind: Connect +metadata: + name: connect + namespace: confluent +spec: + replicas: 1 + image: + application: confluentinc/cp-server-connect:7.0.1 + init: confluentinc/confluent-init-container:2.2.0-1 + dependencies: + kafka: + bootstrapEndpoint: kafka:9071 + metrics: + prometheus: + blacklist: + - kafka.admin.client:* + - kafka.consumer:type=*,id=* + - kafka.producer:type=*,id=* + - kafka.producer:client-id=confluent.monitoring*,* + - kafka.*:type=kafka-metrics-count,* + rules: + - labels: + $2: $3 + client-id: $1 + name: kafka_connect_app_info + pattern: 'kafka.connect<>(.+): (.+)' + type: UNTYPED + value: "1" + valueFactor: 1 + - name: kafka_connect_connect_worker_rebalance_metrics_$1 + pattern: kafka.connect<>([^:]+) + valueFactor: 1 + - labels: + client_id: $2 + name: kafka_connect_$1_$3 + pattern: kafka.connect<>([^:]+) + valueFactor: 1 + - labels: + connector: aggregate + name: kafka_connect_connect_worker_metrics_$1 + pattern: kafka.connect<>([^:]+) + valueFactor: 1 + - labels: + connector: $1 + name: kafka_connect_connect_worker_metrics_$2 + pattern: kafka.connect<>([^:]+) + valueFactor: 1 + - labels: + $2: $3 + connector: $1 + name: kafka_connect_connector_metrics + pattern: 'kafka.connect<>(.+): (.+)' + type: UNTYPED + value: "1" + valueFactor: 1 + - labels: + connector: $2 + task: $3 + name: kafka_connect_$1_task_metrics_$4 + pattern: 'kafka.connect<>(.+): + (.+)' + valueFactor: 1 + - labels: + connector: $1 + task: $2 + name: kafka_connect_task_error_metrics_$3 + pattern: kafka.connect<>([^:]+) + valueFactor: 1 + - labels: + $1: $2 + $3: $4 + $5: $6 + $7: $8 + name: confluent_replicator_task_metrics_$9 + pattern: 'confluent.replicator<>confluent-replicator-task-topic-partition-(.*): + (.*)' + valueFactor: 1 + - labels: + $1: $2 + $3: $4 + $5: $6 + $7: $8 + $9: $10 + name: confluent_replicator_task_metrics_info + pattern: 'confluent.replicator<>(confluent-replicator-destination-cluster|confluent-replicator-source-cluster|confluent-replicator-destination-topic-name): + (.*)' + value: "1" + valueFactor: 1 + - labels: + $3: $4 + client_id: $2 + client_type: $1 + name: kafka_$1_app_info + pattern: 'kafka.(.+)<>(.+): (.+)' + type: UNTYPED + value: "1" + valueFactor: 1 + - labels: + $3: $4 + $5: $6 + $7: $8 + client_type: $1 + name: kafka_$1_$2_$9 + pattern: 'kafka.(.+)<>(.+):' + type: GAUGE + valueFactor: 1 + - labels: + $3: $4 + $5: $6 + client_type: $1 + name: kafka_$1_$2_$7 + pattern: 'kafka.(.+)<>(.+):' + type: GAUGE + valueFactor: 1 + - labels: + $3: $4 + client_type: $1 + name: kafka_$1_$2_$5 + pattern: 'kafka.(.+)<>(.+):' + type: GAUGE + valueFactor: 1 + - labels: + client_type: $1 + name: kafka_$1_$2_$3 + pattern: 'kafka.(.+)<>(.+):' + valueFactor: 1 + whitelist: + - kafka.connect:type=app-info,client-id=* + - kafka.connect:type=connect-worker-rebalance-metrics + - kafka.connect:type=connect-coordinator-metrics,* + - kafka.connect:type=connect-metrics,* + - kafka.connect:type=connect-worker-metrics + - kafka.connect:type=connect-worker-metrics,* + - kafka.connect:type=connector-metrics,* + - kafka.connect:type=*-task-metrics,* + - kafka.connect:type=task-error-metrics,* + - confluent.replicator:type=confluent-replicator-task-metrics,* + - kafka.consumer:* + - kafka.producer:* + +--- +apiVersion: platform.confluent.io/v1beta1 +kind: KsqlDB +metadata: + name: ksqldb + namespace: confluent +spec: + replicas: 1 + image: + application: confluentinc/cp-ksqldb-server:7.0.1 + init: confluentinc/confluent-init-container:2.2.0-1 + dataVolumeCapacity: 10Gi + metrics: + prometheus: + blacklist: + - kafka.streams:type=kafka-metrics-count + - kafka.admin.client:* + - kafka.consumer:type=*,id=* + - kafka.consumer:type=*,client-id=* + - kafka.consumer:type=*,client-id=*,node-id=* + - kafka.producer:type=*,id=* + - kafka.producer:type=*,client-id=* + - kafka.producer:type=*,client-id=*,node-id=* + - kafka.streams:type=stream-processor-node-metrics,thread-id=*,task-id=*,processor-node-id=* + - kafka.*:type=kafka-metrics-count,* + rules: + - labels: + id: $3 + key: $2 + name: ksql_$1_$4 + pattern: io.confluent.ksql.metrics<>([^:]+) + valueFactor: 1 + - labels: + ksql_cluster: $1 + name: ksql_ksql_engine_query_stats_$2 + pattern: io.confluent.ksql.metrics<>([^:]+) + valueFactor: 1 + - labels: + $4: $5 + ksql_cluster: $2 + ksql_query: $3 + name: ksql_ksql_metrics_$1_$4 + pattern: 'io.confluent.ksql.metrics<>(.+): + (.+)' + value: "1" + valueFactor: 1 + - labels: + $4: $5 + task_id: $3 + thread_id: $2 + name: kafka_streams_$1_$6 + pattern: 'kafka.streams<>(.+):' + type: GAUGE + valueFactor: 1 + - labels: + task_id: $3 + thread_id: $2 + name: kafka_streams_$1_$4 + pattern: 'kafka.streams<>(.+):' + type: GAUGE + valueFactor: 1 + - labels: + $1: $2 + $3: $4 + name: kafka_streams_stream_metrics + pattern: 'kafka.streams<>(state|alive-stream-threads|commit-id|version|application-id): + (.+)' + type: UNTYPED + value: "1" + valueFactor: 1 + - labels: + $2: $3 + name: kafka_streams_$1_$4 + pattern: kafka.streams<>([^:]+) + type: GAUGE + valueFactor: 1 + - labels: + $3: $4 + client_id: $2 + client_type: $1 + name: kafka_$1_app_info + pattern: 'kafka.(.+)<>(.+): (.+)' + type: UNTYPED + value: "1" + valueFactor: 1 + - labels: + $3: $4 + $5: $6 + $7: $8 + client_type: $1 + name: kafka_$1_$2_$9 + pattern: 'kafka.(.+)<>(.+):' + type: GAUGE + valueFactor: 1 + - labels: + $3: $4 + $5: $6 + client_type: $1 + name: kafka_$1_$2_$7 + pattern: 'kafka.(.+)<>(.+):' + type: GAUGE + valueFactor: 1 + - labels: + $3: $4 + client_type: $1 + name: kafka_$1_$2_$5 + pattern: 'kafka.(.+)<>(.+):' + type: GAUGE + valueFactor: 1 + - labels: + client_type: $1e + name: kafka_$1_$2_$3 + pattern: 'kafka.(.+)<>(.+):' + valueFactor: 1 + whitelist: + - io.confluent.ksql.metrics:* + - kafka.consumer:* + - kafka.producer:* + - kafka.streams:* + +--- +apiVersion: platform.confluent.io/v1beta1 +kind: ControlCenter +metadata: + name: controlcenter + namespace: confluent +spec: + replicas: 1 + image: + application: confluentinc/cp-enterprise-control-center:7.0.1 + init: confluentinc/confluent-init-container:2.2.0-1 + dataVolumeCapacity: 10Gi + dependencies: + schemaRegistry: + url: http://schemaregistry.confluent.svc.cluster.local:8081 + ksqldb: + - name: ksqldb + url: http://ksqldb.confluent.svc.cluster.local:8088 + connect: + - name: connect + url: http://connect.confluent.svc.cluster.local:8083 + metrics: + prometheus: + blacklist: + - kafka.streams:type=kafka-metrics-count + - kafka.admin.client:* + - kafka.consumer:type=*,id=* + - kafka.producer:type=*,id=* + - kafka.*:type=kafka-metrics-count,* + rules: + - labels: + $4: $5 + task_id: $3 + thread_id: $2 + name: kafka_streams_$1_$6 + pattern: 'kafka.streams<>(.+):' + type: GAUGE + valueFactor: 1 + - labels: + task_id: $3 + thread_id: $2 + name: kafka_streams_$1_$4 + pattern: 'kafka.streams<>(.+):' + type: GAUGE + valueFactor: 1 + - labels: + $1: $2 + $3: $4 + name: kafka_streams_stream_metrics + pattern: 'kafka.streams<>(state|alive-stream-threads|commit-id|version|application-id): + (.+)' + type: UNTYPED + value: "1" + valueFactor: 1 + - labels: + $2: $3 + name: kafka_streams_$1_$4 + pattern: kafka.streams<>([^:]+) + type: GAUGE + valueFactor: 1 + - labels: + $3: $4 + client_id: $2 + client_type: $1 + name: kafka_$1_app_info + pattern: 'kafka.(.+)<>(.+): (.+)' + type: UNTYPED + value: "1" + valueFactor: 1 + - labels: + $3: $4 + $5: $6 + $7: $8 + client_type: $1 + name: kafka_$1_$2_$9 + pattern: 'kafka.(.+)<>(.+):' + type: GAUGE + valueFactor: 1 + - labels: + $3: $4 + $5: $6 + client_type: $1 + name: kafka_$1_$2_$7 + pattern: 'kafka.(.+)<>(.+):' + type: GAUGE + valueFactor: 1 + - labels: + $3: $4 + client_type: $1 + name: kafka_$1_$2_$5 + pattern: 'kafka.(.+)<>(.+):' + type: GAUGE + valueFactor: 1 + - labels: + client_type: $1 + name: kafka_$1_$2_$3 + pattern: 'kafka.(.+)<>(.+):' + valueFactor: 1 + whitelist: + - kafka.streams:* + - kafka.consumer:* + - kafka.producer:* diff --git a/cfk-prometheus-grafana/grafana/confluent-platform.json b/cfk-prometheus-grafana/grafana/confluent-platform.json new file mode 100644 index 00000000..d574b808 --- /dev/null +++ b/cfk-prometheus-grafana/grafana/confluent-platform.json @@ -0,0 +1,2083 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 11, + "iteration": 1647519271805, + "links": [], + "liveNow": false, + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 35, + "panels": [], + "title": "Zookeeper", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "a65Bu5Enk" + }, + "description": "Quorum Size of Zookeeper ensemble", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#d44a3a", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 2 + }, + { + "color": "#299c46", + "value": 3 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 0, + "y": 1 + }, + "id": 16, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.4.2", + "targets": [ + { + "expr": "count(zookeeper_status_quorumsize{namespace=\"$ns\"})", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Zookeeper nodes online", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "a65Bu5Enk" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 4, + "y": 1 + }, + "id": 18, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.4.2", + "targets": [ + { + "expr": "avg(zookeeper_inmemorydatatree_nodecount{namespace=\"$ns\"})", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Number of ZNodes", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "a65Bu5Enk" + }, + "description": "Number of Alive Connections", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#299c46", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 100 + }, + { + "color": "#d44a3a", + "value": 200 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 8, + "y": 1 + }, + "id": 20, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.4.2", + "targets": [ + { + "expr": "sum(zookeeper_numaliveconnections{namespace=\"$ns\"})", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Alive Connections", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "a65Bu5Enk" + }, + "description": "Number of Watchers", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#299c46", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 500 + }, + { + "color": "#d44a3a", + "value": 1000 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 12, + "y": 1 + }, + "id": 22, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.4.2", + "targets": [ + { + "expr": "sum(zookeeper_inmemorydatatree_watchcount{namespace=\"$ns\"})", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Number of Watchers", + "type": "stat" + }, + { + "description": "Number of Alive Connections", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#299c46", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 100 + }, + { + "color": "#d44a3a", + "value": 200 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 8, + "x": 16, + "y": 1 + }, + "id": 24, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.4.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "a65Bu5Enk" + }, + "exemplar": false, + "expr": "zookeeper_outstandingrequests{namespace=\"$ns\"}", + "instant": true, + "interval": "", + "legendFormat": "{{pod}} ({{member_type}})", + "refId": "A" + } + ], + "title": "Outstanding Requests", + "type": "stat" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 5 + }, + "id": 4, + "panels": [], + "title": "Kafka Cluster", + "type": "row" + }, + { + "description": "Number of active controllers in the cluster.", + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#299c46", + "value": null + }, + { + "color": "#e5ac0e", + "value": 2 + }, + { + "color": "#bf1b00" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 0, + "y": 6 + }, + "id": 2, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "vertical", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "value_and_name" + }, + "pluginVersion": "8.4.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "a65Bu5Enk" + }, + "exemplar": false, + "expr": "kafka_controller_kafkacontroller_activecontrollercount{namespace=\"$ns\"} > 0", + "format": "time_series", + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{pod}}", + "refId": "A" + } + ], + "title": "Active Controllers", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "a65Bu5Enk" + }, + "description": "Number of Brokers Online", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#d44a3a", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 0 + }, + { + "color": "#299c46", + "value": 2 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 4, + "y": 6 + }, + "id": 6, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.4.2", + "repeatDirection": "h", + "targets": [ + { + "exemplar": true, + "expr": "count(kafka_server_replicamanager_leadercount{namespace=\"$ns\"})", + "format": "time_series", + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Brokers Online", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "a65Bu5Enk" + }, + "description": "Partitions that are online", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#d44a3a", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 0 + }, + { + "color": "#299c46", + "value": 0 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 8, + "y": 6 + }, + "id": 8, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.4.2", + "targets": [ + { + "exemplar": true, + "expr": "sum(kafka_server_replicamanager_partitioncount{namespace=\"$ns\"})", + "format": "time_series", + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Online Partitions", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "a65Bu5Enk" + }, + "description": "Number of under-replicated partitions (| ISR | < | all replicas |).", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#508642", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 1 + }, + { + "color": "#bf1b00", + "value": 5 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 12, + "y": 6 + }, + "id": 10, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.4.2", + "targets": [ + { + "exemplar": true, + "expr": "sum(kafka_server_replicamanager_underreplicatedpartitions{namespace=\"$ns\"})", + "format": "time_series", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Under Replicated Partitions", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "a65Bu5Enk" + }, + "description": "Number of partitions under min insync replicas.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#508642", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 1 + }, + { + "color": "#bf1b00", + "value": 5 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 16, + "y": 6 + }, + "id": 12, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.4.2", + "targets": [ + { + "exemplar": true, + "expr": "sum(kafka_cluster_partition_underminisr{namespace=\"$ns\"})", + "format": "time_series", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Under Min ISR Partitions", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "a65Bu5Enk" + }, + "description": "Number of partitions that dont have an active leader and are hence not writable or readable.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#508642", + "value": null + }, + { + "color": "#ef843c", + "value": 1 + }, + { + "color": "#bf1b00", + "value": 1 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 20, + "y": 6 + }, + "id": 14, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.4.2", + "targets": [ + { + "exemplar": true, + "expr": "sum(kafka_controller_kafkacontroller_offlinepartitionscount{namespace=\"$ns\"})", + "format": "time_series", + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Offline Partitions Count", + "type": "stat" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 10 + }, + "id": 26, + "panels": [], + "title": "Shema Registry", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "a65Bu5Enk" + }, + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 0, + "y": 11 + }, + "id": 30, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.4.2", + "targets": [ + { + "exemplar": true, + "expr": "count(kafka_schema_registry_registered_count{namespace=\"$ns\"})", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Schema Registry Instances", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "a65Bu5Enk" + }, + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 4, + "y": 11 + }, + "id": 28, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.4.2", + "targets": [ + { + "exemplar": true, + "expr": "sum(kafka_schema_registry_registered_count{namespace=\"$ns\"})", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Schemas registered", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "a65Bu5Enk" + }, + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 8, + "y": 11 + }, + "id": 33, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.4.2", + "targets": [ + { + "exemplar": true, + "expr": "sum(kafka_schema_registry_schemas_deleted{namespace=\"$ns\"})", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Schemas deleted", + "type": "stat" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 15 + }, + "id": 37, + "panels": [], + "repeat": "cluster", + "title": "Kafka Connect ($kafka_connect_app) ", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "a65Bu5Enk" + }, + "fieldConfig": { + "defaults": { + "decimals": 0, + "mappings": [], + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 0, + "y": 16 + }, + "id": 39, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.4.2", + "targets": [ + { + "exemplar": true, + "expr": "count(kafka_connect_connect_worker_metrics_connector_count{namespace=\"$ns\",app=~\"$kafka_connect_app\"})", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Connect worker instances", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "a65Bu5Enk" + }, + "fieldConfig": { + "defaults": { + "decimals": 0, + "mappings": [], + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 4, + "y": 16 + }, + "id": 48, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.4.2", + "targets": [ + { + "exemplar": true, + "expr": "sum(kafka_connect_connect_worker_metrics_connector_total_task_count{namespace=\"$ns\",app=~\"$kafka_connect_app\"})", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Tasks Total", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "a65Bu5Enk" + }, + "fieldConfig": { + "defaults": { + "decimals": 0, + "mappings": [], + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 8, + "y": 16 + }, + "id": 41, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.4.2", + "targets": [ + { + "exemplar": true, + "expr": "sum(kafka_connect_connect_worker_metrics_connector_running_task_count{namespace=\"$ns\",app=~\"$kafka_connect_app\"})", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Tasks Running", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "a65Bu5Enk" + }, + "fieldConfig": { + "defaults": { + "decimals": 0, + "mappings": [], + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 1 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 12, + "y": 16 + }, + "id": 43, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.4.2", + "targets": [ + { + "expr": "sum(kafka_connect_connect_worker_metrics_connector_paused_task_count{namespace=\"$ns\",instance=~\"$instance\",app=~\"$kafka_connect_app\",connector=~\"$connector\"})", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Tasks Paused", + "transformations": [], + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "a65Bu5Enk" + }, + "fieldConfig": { + "defaults": { + "decimals": 0, + "mappings": [], + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 16, + "y": 16 + }, + "id": 45, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.4.2", + "targets": [ + { + "expr": "sum(kafka_connect_connect_worker_metrics_connector_failed_task_count{namespace=\"$ns\",instance=~\"$instance\",app=~\"$kafka_connect_app\",connector=~\"$connector\"})", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Tasks Failed", + "transformations": [], + "type": "stat" + }, + { + "description": "Time since last rebalance", + "fieldConfig": { + "defaults": { + "decimals": 0, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#299c46", + "value": null + } + ] + }, + "unit": "clockms" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 20, + "y": 16 + }, + "id": 47, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "fieldOptions": { + "calcs": [ + "lastNotNull" + ] + }, + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.4.2", + "repeat": "instance", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "a65Bu5Enk" + }, + "exemplar": true, + "expr": "kafka_connect_connect_worker_rebalance_metrics_time_since_last_rebalance_ms{namespace=\"$ns\",app=~\"$kafka_connect_app\"} >= 0", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{pod}}", + "refId": "A" + } + ], + "title": "Time since last rebalance ", + "type": "stat" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 20 + }, + "id": 52, + "panels": [], + "repeat": "clusterid", + "title": "ksqlDB Cluster ($ksqldb_cluster_id) ", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "a65Bu5Enk" + }, + "description": "Average number of active queries per server.", + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#d44a3a", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 0 + }, + { + "color": "#299c46", + "value": 1 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 0, + "y": 21 + }, + "id": 50, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.4.2", + "targets": [ + { + "exemplar": true, + "expr": "count(ksql_ksql_engine_query_stats_num_active_queries{namespace=\"$ns\", ksql_cluster=\"$ksqldb_cluster_id\"})", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "ksqlDB instances", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "a65Bu5Enk" + }, + "description": "Average number of active queries per server.", + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#d44a3a", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 0 + }, + { + "color": "#299c46", + "value": 1 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 4, + "y": 21 + }, + "id": 53, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.4.2", + "targets": [ + { + "exemplar": true, + "expr": "avg(ksql_ksql_engine_query_stats_num_active_queries{namespace=\"$ns\", ksql_cluster=\"$ksqldb_cluster_id\"})", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Active Queries", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "a65Bu5Enk" + }, + "description": "Num of created queries", + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#299c46", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 500 + }, + { + "color": "#d44a3a", + "value": 800 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 8, + "y": 21 + }, + "id": 55, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.4.2", + "targets": [ + { + "exemplar": true, + "expr": "avg(ksql_ksql_engine_query_stats_running_queries{namespace=\"$ns\", ksql_cluster=\"$ksqldb_cluster_id\"})", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Running Queries", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "a65Bu5Enk" + }, + "description": "Num of rebalancing queries", + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#299c46", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 1 + }, + { + "color": "#d44a3a", + "value": 5 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 12, + "y": 21 + }, + "id": 57, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.4.2", + "targets": [ + { + "exemplar": true, + "expr": "sum(ksql_ksql_engine_query_stats_rebalancing_queries{namespace=\"$ns\", ksql_cluster=\"$ksqldb_cluster_id\"})", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Rebalancing Queries", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "a65Bu5Enk" + }, + "description": "Number of error query", + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#299c46", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 1 + }, + { + "color": "#d44a3a", + "value": 2 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 16, + "y": 21 + }, + "id": 59, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.4.2", + "targets": [ + { + "exemplar": true, + "expr": "avg(ksql_ksql_engine_query_stats_error_queries{namespace=\"$ns\", ksql_cluster=\"$ksqldb_cluster_id\"})", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Queries in Error State", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "a65Bu5Enk" + }, + "description": "Num of not running queries", + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#299c46", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 1 + }, + { + "color": "#d44a3a", + "value": 5 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 20, + "y": 21 + }, + "id": 61, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.4.2", + "targets": [ + { + "exemplar": true, + "expr": "sum(ksql_ksql_engine_query_stats_not_running_queries{namespace=\"$ns\", ksql_cluster=\"$ksqldb_cluster_id\"})", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Stopped Queries", + "type": "stat" + } + ], + "refresh": "1m", + "schemaVersion": 35, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "confluent", + "value": "confluent" + }, + "definition": "label_values(namespace)", + "hide": 0, + "includeAll": false, + "label": "Namespace", + "multi": false, + "name": "ns", + "options": [], + "query": { + "query": "label_values(namespace)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "connect", + "value": "connect" + }, + "definition": "label_values(kafka_connect_connect_worker_metrics_connector_count{namespace=\"$ns\"}, app)", + "hide": 0, + "includeAll": false, + "label": "Connect cluster", + "multi": false, + "name": "kafka_connect_app", + "options": [], + "query": { + "query": "label_values(kafka_connect_connect_worker_metrics_connector_count{namespace=\"$ns\"}, app)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "confluent.ksqldb_", + "value": "confluent.ksqldb_" + }, + "definition": "label_values(ksql_ksql_engine_query_stats_liveness_indicator{namespace=\"$ns\"},ksql_cluster)", + "hide": 0, + "includeAll": false, + "label": "ksqlDB Cluster ID", + "multi": false, + "name": "ksqldb_cluster_id", + "options": [], + "query": { + "query": "label_values(ksql_ksql_engine_query_stats_liveness_indicator{namespace=\"$ns\"},ksql_cluster)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Confluent Platform overview", + "uid": "JiqnBMNnz", + "version": 2, + "weekStart": "" +} \ No newline at end of file diff --git a/cfk-prometheus-grafana/grafana/kafka-cluster.json b/cfk-prometheus-grafana/grafana/kafka-cluster.json new file mode 100644 index 00000000..09734d06 --- /dev/null +++ b/cfk-prometheus-grafana/grafana/kafka-cluster.json @@ -0,0 +1,6048 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "description": "Kafka resource usage and throughput", + "editable": true, + "gnetId": 721, + "graphTooltip": 0, + "id": 6, + "iteration": 1647427255896, + "links": [], + "panels": [ + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 42, + "panels": [], + "title": "Healthcheck", + "type": "row" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "Number of active controllers in the cluster.", + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#299c46", + "value": null + }, + { + "color": "#e5ac0e", + "value": 2 + }, + { + "color": "#bf1b00" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 0, + "y": 1 + }, + "id": 12, + "interval": null, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "vertical", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "value_and_name" + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "kafka_controller_kafkacontroller_activecontrollercount{namespace=\"$ns\",pod=~\"$pod\"} > 0", + "format": "time_series", + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "Active Controllers", + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "Number of Brokers Online", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#d44a3a", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 0 + }, + { + "color": "#299c46", + "value": 2 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 4, + "y": 1 + }, + "id": 14, + "interval": null, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": "h", + "targets": [ + { + "expr": "count(kafka_server_replicamanager_leadercount{namespace=\"$ns\",pod=~\"$pod\"})", + "format": "time_series", + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Brokers Online", + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "Partitions that are online", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#d44a3a", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 0 + }, + { + "color": "#299c46", + "value": 0 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 8, + "y": 1 + }, + "id": 18, + "interval": null, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "sum(kafka_server_replicamanager_partitioncount{namespace=\"$ns\",pod=~\"$pod\"})", + "format": "time_series", + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Online Partitions", + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#299c46", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 2 + }, + { + "color": "#d44a3a" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 12, + "y": 1 + }, + "id": 33, + "interval": null, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "sum(kafka_controller_kafkacontroller_preferredreplicaimbalancecount{namespace=\"$ns\",pod=~\"$pod\"})", + "format": "time_series", + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Preferred Replica Imbalance", + "type": "stat" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "Bytes/s", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "Bps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 1 + }, + "id": 84, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "sum(rate(kafka_server_brokertopicmetrics_bytesinpersec{namespace=\"$ns\",pod=~\"$pod\",topic!=\"\"}[5m]))", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "Bytes in", + "metric": "kafka_server_brokertopicmetrics_bytesinpersec", + "refId": "A", + "step": 4 + }, + { + "expr": "sum(rate(kafka_server_brokertopicmetrics_bytesoutpersec{namespace=\"$ns\",pod=~\"$pod\",topic!=\"\"}[5m]))", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "Bytes out", + "metric": "kafka_server_brokertopicmetrics_bytesinpersec", + "refId": "B", + "step": 4 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Broker network throughput", + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "Number of under-replicated partitions (| ISR | < | all replicas |).", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#508642", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 1 + }, + { + "color": "#bf1b00", + "value": 5 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 0, + "y": 5 + }, + "id": 20, + "interval": null, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "sum(kafka_server_replicamanager_underreplicatedpartitions{namespace=\"$ns\",pod=~\"$pod\"})", + "format": "time_series", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Under Replicated Partitions", + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "Number of partitions under min insync replicas.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#508642", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 1 + }, + { + "color": "#bf1b00", + "value": 5 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 4, + "y": 5 + }, + "id": 32, + "interval": null, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "sum(kafka_cluster_partition_underminisr{namespace=\"$ns\",pod=~\"$pod\"})", + "format": "time_series", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Under Min ISR Partitions", + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "Number of partitions that dont have an active leader and are hence not writable or readable.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#508642", + "value": null + }, + { + "color": "#ef843c", + "value": 1 + }, + { + "color": "#bf1b00", + "value": 1 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 8, + "y": 5 + }, + "id": 22, + "interval": null, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "sum(kafka_controller_kafkacontroller_offlinepartitionscount{namespace=\"$ns\",pod=~\"$pod\"})", + "format": "time_series", + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Offline Partitions Count", + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "Unclean leader election rate", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#299c46", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 2 + }, + { + "color": "#d44a3a" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 12, + "y": 5 + }, + "id": 16, + "interval": null, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "sum(kafka_controller_controllerstats_uncleanleaderelectionspersec{namespace=\"$ns\",pod=~\"$pod\"})", + "format": "time_series", + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Unclean Leader Election Rate", + "type": "stat" + }, + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 9 + }, + "id": 31, + "panels": [], + "title": "Request rate", + "type": "row" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "Produce request rate.", + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 0, + "y": 10 + }, + "id": 93, + "interval": null, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "sum(rate(kafka_network_requestmetrics_requestspersec{namespace=\"$ns\",pod=~\"$pod\"}[5m]))", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "All Request Per Sec", + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "Produce request rate.", + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 4, + "y": 10 + }, + "id": 35, + "interval": null, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "sum(rate(kafka_network_requestmetrics_requestspersec{namespace=\"$ns\",pod=~\"$pod\",request=\"Produce\"}[5m]))", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Produce Request Per Sec", + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "Fetch request rate.", + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 8, + "y": 10 + }, + "id": 37, + "interval": null, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "sum(rate(kafka_network_requestmetrics_requestspersec{namespace=\"$ns\",pod=~\"$pod\",request=\"FetchConsumer\"}[5m]))", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Consumer Fetch Request Per Sec", + "type": "stat" + }, + { + "datasource": null, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 10 + }, + "id": 122, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max", + "mean" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "exemplar": true, + "expr": "rate(kafka_network_requestmetrics_errorspersec{error!=\"NONE\"}[5m])", + "interval": "", + "legendFormat": "{{error}} @ {{hostname}}", + "refId": "A" + } + ], + "title": "Errors", + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "Fetch request rate.", + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 0, + "y": 14 + }, + "id": 94, + "interval": null, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "sum(rate(kafka_network_requestmetrics_requestspersec{namespace=\"$ns\",pod=~\"$pod\",request=\"Fetch\"}[5m]))", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Broker Fetch Request Per Sec", + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "Offset Commit request rate.", + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 4, + "y": 14 + }, + "id": 38, + "interval": null, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "sum(rate(kafka_network_requestmetrics_requestspersec{namespace=\"$ns\",pod=~\"$pod\",request=\"OffsetCommit\"}[5m]))", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Offset Commit Request Per Sec", + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "Metadata request rate.", + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 8, + "y": 14 + }, + "id": 36, + "interval": null, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "sum(rate(kafka_network_requestmetrics_requestspersec{namespace=\"$ns\",pod=~\"$pod\",request=\"Metadata\"}[5m]))", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Metadata Request Per Sec", + "type": "stat" + }, + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 18 + }, + "id": 40, + "panels": [], + "title": "System", + "type": "row" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "Cores", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "localhost:7071" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#629E51", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 19 + }, + "id": 27, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "irate(process_cpu_seconds_total{namespace=\"$ns\",pod=~\"$pod\"}[5m])*100", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "metric": "process_cpu_secondspersec", + "refId": "A", + "step": 4 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Usage", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "Memory", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "localhost:7071" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#BA43A9", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 19 + }, + "id": 2, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "sum without(area)(jvm_memory_bytes_used{namespace=\"$ns\",pod=~\"$pod\"})", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "metric": "jvm_memory_bytes_used", + "refId": "A", + "step": 4 + }, + { + "expr": "jvm_memory_bytes_max{area=\"heap\",namespace=\"$ns\",pod=~\"$pod\"}", + "interval": "", + "legendFormat": "{{instance}}", + "refId": "B" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "JVM Memory Used", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "% time in GC", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "localhost:7071" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#890F02", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 19 + }, + "id": 3, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "sum without(gc)(rate(jvm_gc_collection_seconds_sum{namespace=\"$ns\",pod=~\"$pod\"}[5m]))", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "metric": "jvm_gc_collection_seconds_sum", + "refId": "A", + "step": 4 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Time spent in GC", + "type": "timeseries" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 27 + }, + "id": 29, + "panels": [ + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "Messages/s", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 24 + }, + "id": 4, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "exemplar": true, + "expr": "sum without(instance,topic)(rate(kafka_server_brokertopicmetrics_messagesinpersec{namespace=\"$ns\",topic!=\"\"}[5m]))", + "interval": "", + "intervalFactor": 2, + "legendFormat": "bytes/sec", + "metric": "kafka_server_brokertopicmetrics_messagesinpersec", + "refId": "A", + "step": 4 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Messages In", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "Bytes/s", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "Bps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 24 + }, + "id": 5, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "exemplar": true, + "expr": "sum without(instance,topic)(rate(kafka_server_brokertopicmetrics_bytesinpersec{namespace=\"$ns\",topic!=\"\"}[5m]))", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "bytes/sec", + "metric": "kafka_server_brokertopicmetrics_bytesinpersec", + "refId": "A", + "step": 4 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Bytes In", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "Bytes/s", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "Bps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 24 + }, + "id": 6, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "exemplar": true, + "expr": "sum without(instance,topic)(rate(kafka_server_brokertopicmetrics_bytesoutpersec{namespace=\"$ns\",topic!=\"\"}[5m]))", + "interval": "", + "intervalFactor": 2, + "legendFormat": "bytes/sec", + "metric": "kafka_server_brokertopicmetrics_bytesinpersec", + "refId": "A", + "step": 4 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Bytes Out", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "Messages/s", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 32 + }, + "id": 10, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "sum without(topic)(rate(kafka_server_brokertopicmetrics_messagesinpersec{namespace=\"$ns\",pod=~\"$pod\",topic!=\"\"}[5m]))", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "metric": "kafka_server_brokertopicmetrics_messagesinpersec", + "refId": "A", + "step": 4 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Messages In Per Broker", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "Bytes/s", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "Bps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 32 + }, + "id": 7, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "sum without(topic)(rate(kafka_server_brokertopicmetrics_bytesinpersec{namespace=\"$ns\",pod=~\"$pod\",topic!=\"\"}[5m]))", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "metric": "kafka_server_brokertopicmetrics_bytesinpersec", + "refId": "A", + "step": 4 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Bytes In Per Broker", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 32 + }, + "id": 9, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "sum without(topic)(rate(kafka_server_brokertopicmetrics_bytesoutpersec{namespace=\"$ns\",pod=~\"$pod\",topic!=\"\"}[5m]))", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Bytes Out Per Broker", + "type": "timeseries" + } + ], + "title": "Throughput In/Out", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 28 + }, + "id": 44, + "panels": [ + { + "datasource": "Prometheus", + "description": "Average fraction of time the network processor threads are idle. Values are between 0 (all resources are used) and 100 (all resources are available)\n", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 25 + }, + "id": 24, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "1-kafka_network_socketserver_networkprocessoravgidlepercent{namespace=\"$ns\",pod=~\"$pod\"}", + "interval": "", + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Network Processor Avg Usage Percent", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "description": "Average fraction of time the request handler threads are idle. Values are between 0 (all resources are used) and 100 (all resources are available).\n", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 25 + }, + "id": 25, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "1 - kafka_server_kafkarequesthandlerpool_requesthandleravgidlepercent_total{namespace=\"$ns\",pod=~\"$pod\"}", + "interval": "", + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Request Handler Avg Percent", + "type": "timeseries" + } + ], + "title": "Thread utilization", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 29 + }, + "id": 86, + "panels": [ + { + "datasource": "Prometheus", + "description": "Latency in millseconds for ZooKeeper requests from broker.\n", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 26 + }, + "id": 88, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "kafka_server_zookeeperclientmetrics_zookeeperrequestlatencyms{namespace=\"$ns\",pod=~\"$pod\",quantile=~\"$percentile\"}", + "hide": false, + "legendFormat": "{{instance}} - {{quantile}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Zookeeper Request Latency", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 26 + }, + "id": 92, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "kafka_server_sessionexpirelistener_zookeepersyncconnectspersec{namespace=\"$ns\",pod=~\"$pod\"}", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Zookeeper connections per sec", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 8, + "x": 0, + "y": 35 + }, + "id": 89, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "kafka_server_sessionexpirelistener_zookeeperexpirespersec{namespace=\"$ns\",pod=~\"$pod\"}", + "hide": false, + "interval": "", + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Zookeeper expired connections per sec", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 8, + "x": 8, + "y": 35 + }, + "id": 90, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "kafka_server_sessionexpirelistener_zookeeperdisconnectspersec{namespace=\"$ns\",pod=~\"$pod\"}", + "hide": false, + "interval": "", + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Zookeeper disconnect per sec", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 8, + "x": 16, + "y": 35 + }, + "id": 91, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "kafka_server_sessionexpirelistener_zookeeperauthfailurespersec{namespace=\"$ns\",pod=~\"$pod\"}", + "hide": false, + "interval": "", + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Zookeeper auth failures per sec", + "type": "timeseries" + } + ], + "title": "Zookeeper", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 30 + }, + "id": 82, + "panels": [ + { + "datasource": "Prometheus", + "description": ": The number of in-sync replicas (ISRs) for a particular partition should remain fairly static, the only exceptions are when you are expanding your broker cluster or removing partitions. In order to maintain high availability, a healthy Kafka cluster requires a minimum number of ISRs for failover. A replica could be removed from the ISR pool for a couple of reasons: it is too far behind the leaders offset (user-configurable by setting the replica.lag.max.messages configuration parameter), or it has not contacted the leader for some time (configurable with the replica.socket.timeout.ms parameter). No matter the reason, an increase in IsrShrinksPerSec without a corresponding increase in IsrExpandsPerSec shortly thereafter is cause for concern and requires user intervention.The Kafka documentation provides a wealth of information on the user-configurable parameters for brokers.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ops" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 27 + }, + "id": 80, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "rate(kafka_server_replicamanager_isrshrinkspersec{namespace=\"$ns\",pod=~\"$pod\"}[5m])", + "interval": "", + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "IsrShrinks per Sec", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "description": ": The number of in-sync replicas (ISRs) for a particular partition should remain fairly static, the only exceptions are when you are expanding your broker cluster or removing partitions. In order to maintain high availability, a healthy Kafka cluster requires a minimum number of ISRs for failover. A replica could be removed from the ISR pool for a couple of reasons: it is too far behind the leaders offset (user-configurable by setting the replica.lag.max.messages configuration parameter), or it has not contacted the leader for some time (configurable with the replica.socket.timeout.ms parameter). No matter the reason, an increase in IsrShrinksPerSec without a corresponding increase in IsrExpandsPerSec shortly thereafter is cause for concern and requires user intervention.The Kafka documentation provides a wealth of information on the user-configurable parameters for brokers.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ops" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 27 + }, + "id": 83, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "rate(kafka_server_replicamanager_isrexpandspersec{namespace=\"$ns\",pod=~\"$pod\"}[5m])", + "hide": false, + "interval": "", + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "IsrExpands per Sec", + "type": "timeseries" + } + ], + "title": "Isr Shrinks / Expands", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 31 + }, + "id": 53, + "panels": [ + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 28 + }, + "id": 55, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "sum(kafka_log_log_size{namespace=\"$ns\",pod=~\"$pod\"}) by (topic)", + "legendFormat": "{{topic}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Log size per Topic", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 28 + }, + "id": 56, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "sum(kafka_log_log_size{namespace=\"$ns\",pod=~\"$pod\"}) by (instance)", + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Log size per Broker", + "type": "timeseries" + } + ], + "title": "Logs size", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 32 + }, + "id": 58, + "panels": [ + { + "datasource": "Prometheus", + "description": "A high value can imply there aren't enough IO threads or the CPU is a bottleneck, or the request queue isnt large enough. The request queue size should match the number of connections.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 29 + }, + "id": 60, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "kafka_network_requestmetrics_requestqueuetimems{namespace=\"$ns\",pod=~\"$pod\",quantile=~\"$percentile\",request=\"Produce\"}", + "hide": false, + "legendFormat": "{{instance}} - {{quantile}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Producer - RequestQueueTimeMs", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "description": "In most cases, a high value can imply slow local storage or the storage is a bottleneck. One should also investigate LogFlushRateAndTimeMs to know how long page flushes are taking, which will also indicate a slow disk. In the case of FetchFollower requests, time spent in LocalTimeMs can be the result of a ZooKeeper write to change the ISR.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 29 + }, + "id": 61, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "kafka_network_requestmetrics_localtimems{namespace=\"$ns\",pod=~\"$pod\",quantile=~\"$percentile\",request=\"Produce\"}", + "hide": false, + "legendFormat": "{{instance}} - {{quantile}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Producer - LocalTimeMs", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "description": "A high value can imply a slow network connection. For fetch request, if the remote time is high, it could be that there is not enough data to give in a fetch response. This can happen when the consumer or replica is caught up and there is no new incoming data. If this is the case, remote time will be close to the max wait time, which is normal. Max wait time is configured via replica.fetch.wait.max.ms and fetch.max.wait.ms.\n", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 38 + }, + "id": 62, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "kafka_network_requestmetrics_remotetimems{namespace=\"$ns\",pod=~\"$pod\",quantile=~\"$percentile\",request=\"Produce\"}", + "hide": false, + "legendFormat": "{{instance}} - {{quantile}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Producer - RemoteTimeMs", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "description": "A high value can imply there aren't enough network threads or the network cant dequeue responses quickly enough, causing back pressure in the response queue.\n", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 38 + }, + "id": 63, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "kafka_network_requestmetrics_responsequeuetimems{namespace=\"$ns\",pod=~\"$pod\",quantile=~\"$percentile\",request=\"Produce\"}", + "hide": false, + "legendFormat": "{{instance}} - {{quantile}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Producer - ResponseQueueTimeMs", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "description": "A high value can imply the zero-copy from disk to the network is slow, or the network is the bottleneck because the network cant dequeue responses of the TCP socket as quickly as theyre being created. If the network buffer gets full, Kafka will block.\n", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 38 + }, + "id": 64, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "kafka_network_requestmetrics_responsesendtimems{namespace=\"$ns\",pod=~\"$pod\",quantile=~\"$percentile\",request=\"Produce\"}", + "hide": false, + "legendFormat": "{{instance}} - {{quantile}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Producer - ResponseSendTimeMs", + "type": "timeseries" + } + ], + "title": "Producer Performance", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 33 + }, + "id": 68, + "panels": [ + { + "datasource": "Prometheus", + "description": "A high value can imply there aren't enough IO threads or the CPU is a bottleneck, or the request queue isnt large enough. The request queue size should match the number of connections.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 30 + }, + "id": 69, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "kafka_network_requestmetrics_requestqueuetimems{namespace=\"$ns\",pod=~\"$pod\",quantile=~\"$percentile\",request=\"Fetch\"}", + "hide": false, + "legendFormat": "{{instance}} - {{quantile}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Consumer - RequestQueueTimeMs", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "description": "In most cases, a high value can imply slow local storage or the storage is a bottleneck. One should also investigate LogFlushRateAndTimeMs to know how long page flushes are taking, which will also indicate a slow disk. In the case of FetchFollower requests, time spent in LocalTimeMs can be the result of a ZooKeeper write to change the ISR.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 30 + }, + "id": 70, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "kafka_network_requestmetrics_localtimems{namespace=\"$ns\",pod=~\"$pod\",quantile=~\"$percentile\",request=\"Fetch\"}", + "hide": false, + "legendFormat": "{{instance}} - {{quantile}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Consumer - LocalTimeMs", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "description": "A high value can imply a slow network connection. For fetch request, if the remote time is high, it could be that there is not enough data to give in a fetch response. This can happen when the consumer or replica is caught up and there is no new incoming data. If this is the case, remote time will be close to the max wait time, which is normal. Max wait time is configured via replica.fetch.wait.max.ms and fetch.max.wait.ms.\n", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 8, + "x": 0, + "y": 39 + }, + "id": 71, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "kafka_network_requestmetrics_remotetimems{namespace=\"$ns\",pod=~\"$pod\",quantile=~\"$percentile\",request=\"Fetch\"}", + "hide": false, + "legendFormat": "{{instance}} - {{quantile}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Consumer - RemoteTimeMs", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "description": "A high value can imply there aren't enough network threads or the network cant dequeue responses quickly enough, causing back pressure in the response queue.\n", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 8, + "x": 8, + "y": 39 + }, + "id": 72, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "kafka_network_requestmetrics_responsequeuetimems{namespace=\"$ns\",pod=~\"$pod\",quantile=~\"$percentile\",request=\"Fetch\"}", + "hide": false, + "legendFormat": "{{instance}} - {{quantile}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Consumer - ResponseQueueTimeMs", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "description": "A high value can imply the zero-copy from disk to the network is slow, or the network is the bottleneck because the network cant dequeue responses of the TCP socket as quickly as theyre being created. If the network buffer gets full, Kafka will block.\n", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 8, + "x": 16, + "y": 39 + }, + "id": 73, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "kafka_network_requestmetrics_responsesendtimems{namespace=\"$ns\",pod=~\"$pod\",quantile=~\"$percentile\",request=\"Fetch\"}", + "hide": false, + "legendFormat": "{{instance}} - {{quantile}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Consumer - ResponseSendTimeMs", + "type": "timeseries" + } + ], + "title": "Consumer Performance", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 34 + }, + "id": 66, + "panels": [ + { + "datasource": "Prometheus", + "description": "A high value can imply there aren't enough IO threads or the CPU is a bottleneck, or the request queue isnt large enough. The request queue size should match the number of connections.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 31 + }, + "id": 74, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "kafka_network_requestmetrics_requestqueuetimems{namespace=\"$ns\",pod=~\"$pod\",quantile=~\"$percentile\",request=\"FetchFollower\"}", + "hide": false, + "legendFormat": "{{instance}} - {{quantile}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "FetchFollower - RequestQueueTimeMs", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "description": "In most cases, a high value can imply slow local storage or the storage is a bottleneck. One should also investigate LogFlushRateAndTimeMs to know how long page flushes are taking, which will also indicate a slow disk. In the case of FetchFollower requests, time spent in LocalTimeMs can be the result of a ZooKeeper write to change the ISR.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 31 + }, + "id": 75, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "kafka_network_requestmetrics_localtimems{namespace=\"$ns\",pod=~\"$pod\",quantile=~\"$percentile\",request=\"FetchFollower\"}", + "hide": false, + "legendFormat": "{{instance}} - {{quantile}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "FetchFollower - LocalTimeMs", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "description": "A high value can imply a slow network connection. For fetch request, if the remote time is high, it could be that there is not enough data to give in a fetch response. This can happen when the consumer or replica is caught up and there is no new incoming data. If this is the case, remote time will be close to the max wait time, which is normal. Max wait time is configured via replica.fetch.wait.max.ms and fetch.max.wait.ms.\n", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 8, + "x": 0, + "y": 40 + }, + "id": 76, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "kafka_network_requestmetrics_remotetimems{namespace=\"$ns\",pod=~\"$pod\",quantile=~\"$percentile\",request=\"FetchFollower\"}", + "hide": false, + "legendFormat": "{{instance}} - {{quantile}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "FetchFollower - RemoteTimeMs", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "description": "A high value can imply there aren't enough network threads or the network cant dequeue responses quickly enough, causing back pressure in the response queue.\n", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 8, + "x": 8, + "y": 40 + }, + "id": 77, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "kafka_network_requestmetrics_responsequeuetimems{namespace=\"$ns\",pod=~\"$pod\",quantile=~\"$percentile\",request=\"FetchFollower\"}", + "hide": false, + "legendFormat": "{{instance}} - {{quantile}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "FetchFollower - ResponseQueueTimeMs", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "description": "A high value can imply the zero-copy from disk to the network is slow, or the network is the bottleneck because the network cant dequeue responses of the TCP socket as quickly as theyre being created. If the network buffer gets full, Kafka will block.\n", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 8, + "x": 16, + "y": 40 + }, + "id": 78, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "kafka_network_requestmetrics_responsesendtimems{namespace=\"$ns\",pod=~\"$pod\",quantile=~\"$percentile\",request=\"FetchFollower\"}", + "hide": false, + "legendFormat": "{{instance}} - {{quantile}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "FetchFollower - ResponseSendTimeMs", + "type": "timeseries" + } + ], + "title": "Fetch Follower Performance", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 35 + }, + "id": 102, + "panels": [ + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 32 + }, + "id": 98, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "sum(kafka_server_socketservermetrics_connection_count{ namespace=\"$ns\", pod=~\"$pod\"}) by (listener)", + "interval": "", + "legendFormat": "{{listener}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connections count per listener", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 32 + }, + "id": 100, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "sum(kafka_server_socketservermetrics_connection_count{ namespace=\"$ns\", pod=~\"$pod\"}) by (instance)", + "interval": "", + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connections count per broker", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 40 + }, + "id": 104, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "sum(kafka_server_socketservermetrics_connection_creation_rate{ namespace=\"$ns\", pod=~\"$pod\"}) by (listener)", + "interval": "", + "legendFormat": "{{listener}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connections creation rate per listener", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 40 + }, + "id": 106, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "sum(kafka_server_socketservermetrics_connection_creation_rate{ namespace=\"$ns\", pod=~\"$pod\"}) by (instance)", + "interval": "", + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connections creation rate per instance", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 48 + }, + "id": 108, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "sum(kafka_server_socketservermetrics_connection_close_rate{ namespace=\"$ns\", pod=~\"$pod\"}) by (listener)", + "interval": "", + "legendFormat": "{{listener}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connections close rate per listener", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 48 + }, + "id": 110, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "sum(kafka_server_socketservermetrics_connection_close_rate{ namespace=\"$ns\", pod=~\"$pod\"}) by (instance)", + "interval": "", + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connections close rate per instance", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "description": "Tracks the amount of time Acceptor is blocked from accepting connections. See KIP-402 for more details.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 56 + }, + "id": 112, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "kafka_network_acceptor_acceptorblockedpercent{ namespace=\"$ns\", pod=~\"$pod\"}", + "interval": "", + "legendFormat": "{{instance}} - {{listener}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Acceptor Blocked Percentage", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 56 + }, + "id": 114, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "sum(kafka_server_socketservermetrics_connections{ namespace=\"$ns\", pod=~\"$pod\"}) by (client_software_name, client_software_version)", + "interval": "", + "legendFormat": "{{client_software_name}} {{client_software_version}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connections per client version", + "type": "timeseries" + } + ], + "title": "Connections", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 36 + }, + "id": 120, + "panels": [ + { + "datasource": "Prometheus", + "description": "Number of consumer groups per group coordinator", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 33 + }, + "id": 116, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "kafka_coordinator_group_groupmetadatamanager_numgroups{ namespace=\"$ns\", pod=~\"$pod\"}", + "instant": false, + "interval": "", + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Consumer groups number per coordinator", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "description": "Number of consumer group per state", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 33 + }, + "id": 118, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupsstable{ namespace=\"$ns\", pod=~\"$pod\"})", + "instant": false, + "interval": "", + "legendFormat": "stable", + "refId": "A" + }, + { + "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupspreparingrebalance{ namespace=\"$ns\", pod=~\"$pod\"})", + "interval": "", + "legendFormat": "preparing-rebalance", + "refId": "B" + }, + { + "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupsdead{ namespace=\"$ns\", pod=~\"$pod\"})", + "interval": "", + "legendFormat": "dead", + "refId": "C" + }, + { + "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupscompletingrebalance{ namespace=\"$ns\", pod=~\"$pod\"})", + "interval": "", + "legendFormat": "completing-rebalance", + "refId": "D" + }, + { + "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupsempty{ namespace=\"$ns\", pod=~\"$pod\"})", + "interval": "", + "legendFormat": "empty", + "refId": "E" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Nb consumer groups per state", + "type": "timeseries" + } + ], + "title": "Group Coordinator", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 37 + }, + "id": 46, + "panels": [ + { + "datasource": "Prometheus", + "description": "The number of messages produced converted to match the log.message.format.version.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ops" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 34 + }, + "id": 48, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "sum(kafka_server_brokertopicmetrics_producemessageconversionspersec{namespace=\"$ns\",pod=~\"$pod\"})", + "hide": false, + "interval": "", + "legendFormat": "{{topic}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Number of produced message conversion", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "description": "The number of messages consumed converted at consumer to match the log.message.format.version.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ops" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 34 + }, + "id": 51, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "sum(kafka_server_brokertopicmetrics_fetchmessageconversionspersec{namespace=\"$ns\",pod=~\"$pod\"})", + "hide": false, + "interval": "", + "legendFormat": "{{topic}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Number of consumed message conversion", + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": null, + "description": "Number of connection per client version", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "decimals": 0, + "mappings": [], + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 34 + }, + "id": 96, + "interval": null, + "links": [], + "options": { + "legend": { + "calcs": [], + "displayMode": "table", + "placement": "bottom", + "values": [ + "value" + ] + }, + "pieType": "pie", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "single" + } + }, + "targets": [ + { + "expr": "sum(kafka_server_socketservermetrics_connections{ namespace=\"$ns\", pod=~\"$pod\"}) by (client_software_name, client_software_version) ", + "interval": "", + "legendFormat": "{{client_software_name}} - {{client_software_version}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Client version repartition", + "type": "piechart" + } + ], + "title": "Message Conversion", + "type": "row" + } + ], + "refresh": "1m", + "schemaVersion": 30, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "allValue": null, + "current": { + "selected": false, + "text": "dev", + "value": "dev" + }, + "datasource": "Prometheus", + "definition": "label_values(namespace)", + "description": null, + "error": null, + "hide": 0, + "includeAll": false, + "label": "Namespace", + "multi": false, + "name": "ns", + "options": [], + "query": { + "query": "label_values(namespace)", + "refId": "Prometheus-namespace-Variable-Query" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": "Prometheus", + "definition": "label_values(kafka_server_kafkaserver_brokerstate{namespace=\"$ns\"}, pod)", + "description": null, + "error": null, + "hide": 0, + "includeAll": true, + "label": "Pod", + "multi": true, + "name": "pod", + "options": [], + "query": { + "query": "label_values(kafka_server_kafkaserver_brokerstate{namespace=\"$ns\"}, pod)", + "refId": "Prometheus-pod-Variable-Query" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": "", + "current": { + "selected": true, + "text": [ + "0.95" + ], + "value": [ + "0.95" + ] + }, + "datasource": "Prometheus", + "definition": "label_values(quantile)", + "description": null, + "error": null, + "hide": 0, + "includeAll": true, + "label": "Percentile", + "multi": true, + "name": "percentile", + "options": [], + "query": { + "query": "label_values(quantile)", + "refId": "Prometheus-percentile-Variable-Query" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-12h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Kafka cluster", + "uid": "qu-QZdfZz", + "version": 2 +} \ No newline at end of file diff --git a/cfk-prometheus-grafana/grafana/kafka-connect-cluster.json b/cfk-prometheus-grafana/grafana/kafka-connect-cluster.json new file mode 100644 index 00000000..f0318d2a --- /dev/null +++ b/cfk-prometheus-grafana/grafana/kafka-connect-cluster.json @@ -0,0 +1,5618 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "description": "Monitor Apache Kafka Connect", + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "id": 5, + "iteration": 1632255569594, + "links": [], + "panels": [ + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 199, + "panels": [], + "title": "General", + "type": "row" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "decimals": 0, + "mappings": [], + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 0, + "y": 1 + }, + "id": 212, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "sum(kafka_connect_connect_worker_metrics_connector_total_task_count{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\"})", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Tasks Total", + "type": "stat" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "decimals": 0, + "mappings": [], + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 4, + "y": 1 + }, + "id": 213, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "sum(kafka_connect_connect_worker_metrics_connector_running_task_count{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\"})", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Tasks Running", + "type": "stat" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "decimals": 0, + "mappings": [], + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 1 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 8, + "y": 1 + }, + "id": 215, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "sum(kafka_connect_connect_worker_metrics_connector_paused_task_count{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\"})", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Tasks Paused", + "transformations": [], + "type": "stat" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "decimals": 0, + "mappings": [], + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 1 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 12, + "y": 1 + }, + "id": 214, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "sum(kafka_connect_connect_worker_metrics_connector_failed_task_count{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\"})", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Tasks Failed", + "transformations": [], + "type": "stat" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "decimals": 0, + "mappings": [], + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 1 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 16, + "y": 1 + }, + "id": 216, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "sum(kafka_connect_connect_worker_metrics_connector_unassigned_task_count{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\"})", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Tasks Unassigned", + "transformations": [], + "type": "stat" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "decimals": 0, + "mappings": [], + "noValue": "0", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "purple", + "value": 1 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 20, + "y": 1 + }, + "id": 217, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "sum(kafka_connect_connect_worker_metrics_connector_destroyed_task_count{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\"})", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Tasks Destroyed", + "transformations": [], + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "decimals": 0, + "mappings": [], + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "paused" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FF9830", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "running" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#73BF69", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 4 + }, + "id": 227, + "interval": null, + "links": [], + "maxDataPoints": 1, + "options": { + "legend": { + "calcs": [], + "displayMode": "table", + "placement": "right", + "values": [ + "value", + "percent" + ] + }, + "pieType": "pie", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "7.0.5", + "targets": [ + { + "expr": "sum (kafka_connect_connector_metrics{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\",status=\"running\"})", + "instant": true, + "interval": "", + "legendFormat": "running", + "refId": "B" + }, + { + "expr": "sum (kafka_connect_connector_metrics{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\",status=\"stopped\"})", + "instant": true, + "interval": "", + "legendFormat": "stopped", + "refId": "A" + }, + { + "expr": "sum (kafka_connect_connector_metrics{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\",status=\"paused\"})", + "instant": true, + "interval": "", + "legendFormat": "paused", + "refId": "C" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connector repartition per status", + "type": "piechart" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "decimals": 0, + "mappings": [], + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "destroyed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#B877D9", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#F2495C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "paused" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FF9830", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "unassigned" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FADE2A", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 4 + }, + "id": 219, + "interval": null, + "links": [], + "maxDataPoints": 1, + "options": { + "legend": { + "calcs": [], + "displayMode": "table", + "placement": "right", + "values": [ + "value", + "percent" + ] + }, + "pieType": "pie", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "7.0.5", + "targets": [ + { + "expr": "sum(kafka_connect_connect_worker_metrics_connector_running_task_count{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\"})", + "instant": true, + "interval": "", + "legendFormat": "running", + "refId": "A" + }, + { + "expr": "sum(kafka_connect_connect_worker_metrics_connector_failed_task_count{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\"})", + "instant": true, + "interval": "", + "legendFormat": "failed", + "refId": "B" + }, + { + "expr": "sum(kafka_connect_connect_worker_metrics_connector_paused_task_count{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\"})", + "instant": true, + "interval": "", + "legendFormat": "paused", + "refId": "C" + }, + { + "expr": "sum(kafka_connect_connect_worker_metrics_connector_unassigned_task_count{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\"})", + "instant": true, + "interval": "", + "legendFormat": "unassigned", + "refId": "D" + }, + { + "expr": "sum(kafka_connect_connect_worker_metrics_connector_destroyed_task_count{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\"})", + "instant": true, + "interval": "", + "legendFormat": "destroyed", + "refId": "E" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Task repartition per status", + "type": "piechart" + }, + { + "datasource": "Prometheus", + "description": "Status of connectors over time", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 3, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "paused" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/stopped.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#F2495C", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/paused.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FF9830", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/running.*/" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#73BF69", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 6, + "w": 12, + "x": 0, + "y": 11 + }, + "id": 228, + "links": [], + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "sum (kafka_connect_connector_metrics{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\",status!=\"\"}) by (status) ", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Status of connectors", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "description": "Status of tasks over time", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 3, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "destroyed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "purple", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "paused" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "running" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "green", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "unassigned" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 6, + "w": 12, + "x": 12, + "y": 11 + }, + "id": 226, + "links": [], + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "sum(kafka_connect_connect_worker_metrics_connector_running_task_count{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "running", + "refId": "A" + }, + { + "expr": "sum(kafka_connect_connect_worker_metrics_connector_failed_task_count{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "failed", + "refId": "B" + }, + { + "expr": "sum(kafka_connect_connect_worker_metrics_connector_paused_task_count{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "paused", + "refId": "C" + }, + { + "expr": "sum(kafka_connect_connect_worker_metrics_connector_destroyed_task_count{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "destroyed", + "refId": "D" + }, + { + "expr": "sum(kafka_connect_connect_worker_metrics_connector_unassigned_task_count{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "unassigned", + "refId": "E" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Status of tasks", + "type": "timeseries" + }, + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 17 + }, + "id": 221, + "panels": [], + "title": "System", + "type": "row" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "Cores", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 18 + }, + "id": 223, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "exemplar": true, + "expr": "rate(process_cpu_seconds_total{namespace=\"$ns\",pod=~\"$pod\"}[5m])", + "interval": "", + "legendFormat": "{{kafka_connect_cluster_id}}-{{instance}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Usage", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "Memory", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 18 + }, + "id": 224, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "sum without(area)(jvm_memory_bytes_used{namespace=\"$ns\",pod=~\"$pod\"})", + "interval": "", + "legendFormat": "{{kafka_connect_cluster_id}}-{{instance}}", + "refId": "A" + }, + { + "expr": "jvm_memory_bytes_max{namespace=\"$ns\",pod=~\"$pod\",area=\"heap\"}", + "interval": "", + "legendFormat": "{{instance}}", + "refId": "B" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "JVM Memory Used", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "% time in GC", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 2, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 18 + }, + "id": 225, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "sum without(gc)(rate(jvm_gc_collection_seconds_sum{namespace=\"$ns\",pod=~\"$pod\"}[5m]))", + "interval": "", + "legendFormat": "{{kafka_connect_cluster_id}}-{{instance}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "JVM GC time", + "type": "timeseries" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 26 + }, + "id": 97, + "panels": [ + { + "columns": [], + "datasource": "Prometheus", + "fontSize": "90%", + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 27 + }, + "id": 146, + "pageSize": 100, + "showHeader": true, + "sort": { + "col": 7, + "desc": true + }, + "styles": [ + { + "alias": "", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "Time", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "__name__", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "env", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "instance", + "thresholds": [], + "type": "string", + "unit": "short" + }, + { + "alias": "", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "job", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "client_id", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "Startup time", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "MMMM D, YYYY LT", + "decimals": 2, + "mappingType": 1, + "pattern": "start_time_ms", + "thresholds": [], + "type": "date", + "unit": "short" + }, + { + "alias": "", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "Value #B", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "Value #A", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "Connector Count", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "mappingType": 1, + "pattern": "Value #C", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "Connector Startup Success Total", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "mappingType": 1, + "pattern": "Value #D", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "Connector Startup Failure Total", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "mappingType": 1, + "pattern": "Value #E", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "Number of rebalances", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "mappingType": 1, + "pattern": "Value #F", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "Average time of Rebalances", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "mappingType": 1, + "pattern": "Value #G", + "thresholds": [], + "type": "number", + "unit": "ms", + "valueMaps": [ + { + "text": "0", + "value": "NaN" + }, + { + "text": "N/A", + "value": "null" + } + ] + }, + { + "alias": "Time since last rebalance", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "mappingType": 1, + "pattern": "Value #H", + "thresholds": [], + "type": "number", + "unit": "ms" + }, + { + "alias": "Worker instance", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "instance", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "Number of tasks", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "mappingType": 1, + "pattern": "Value #I", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "Task Startup Success ", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "mappingType": 1, + "pattern": "Value #J", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "Task Startup Failure", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "mappingType": 1, + "pattern": "Value #K", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "align": "right", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "expr": "kafka_connect_app_info{namespace=\"$ns\",pod=~\"$pod\",client_id!=\"\",start_time_ms!=\"\"}", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "B" + }, + { + "expr": "kafka_connect_app_info{namespace=\"$ns\",pod=~\"$pod\",client_id!=\"\",version!=\"\"}", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + }, + { + "expr": "sum by (instance) (kafka_connect_connect_worker_metrics_connector_count{namespace=\"$ns\",pod=~\"$pod\"})", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "C" + }, + { + "expr": "sum by (instance) (kafka_connect_connect_worker_metrics_connector_startup_success_total{namespace=\"$ns\",pod=~\"$pod\"})", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "D" + }, + { + "expr": "sum by (instance) (kafka_connect_connect_worker_metrics_connector_startup_failure_total{namespace=\"$ns\",pod=~\"$pod\"})", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "E" + }, + { + "expr": "sum by (instance) (kafka_connect_connect_worker_metrics_task_count{namespace=\"$ns\",pod=~\"$pod\"})", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "I" + }, + { + "expr": "sum by (instance) (kafka_connect_connect_worker_metrics_task_startup_success_total{namespace=\"$ns\",pod=~\"$pod\"})", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "J" + }, + { + "expr": "sum by (instance) (kafka_connect_connect_worker_metrics_task_startup_failure_total{namespace=\"$ns\",pod=~\"$pod\"})", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "K" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connect Worker", + "transform": "table", + "transparent": true, + "type": "table-old" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "description": "Average number of network operations (reads or writes) on all connections per second", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 35 + }, + "hiddenSeries": false, + "id": 95, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.1.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "kafka_connect_connect_metrics_network_io_rate{namespace=\"$ns\",pod=~\"$pod\",client_id!=\"\"}", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}-{{client_id}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Network IO Rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ops", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "description": "Bytes per second read off all sockets", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 35 + }, + "hiddenSeries": false, + "id": 91, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.1.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "kafka_connect_connect_metrics_incoming_byte_rate{namespace=\"$ns\",pod=~\"$pod\",client_id!=\"\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}-{{client_id}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Incoming Byte Rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "description": "Average number of outgoing bytes sent per second to all servers", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 35 + }, + "hiddenSeries": false, + "id": 171, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.1.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "kafka_connect_connect_metrics_outgoing_byte_rate{namespace=\"$ns\",pod=~\"$pod\",client_id!=\"\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}-{{client_id}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Outgoing Byte Rate", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "description": "Current number of active connections", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 42 + }, + "hiddenSeries": false, + "id": 169, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.1.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "kafka_connect_connect_metrics_connection_count{namespace=\"$ns\",pod=~\"$pod\",client_id!=\"\"}", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}-{{client_id}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Current number of active connections", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 0, + "format": "connections", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "description": "Connections that failed authentication", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 42 + }, + "hiddenSeries": false, + "id": 170, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.1.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "kafka_connect_connect_metrics_failed_authentication_total{namespace=\"$ns\",pod=~\"$pod\",client_id!=\"\"}", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}-{{client_id}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Failed authentication connections", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 0, + "format": "connections", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "description": "Connections that were successfully authenticated using SASL or SSL", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 42 + }, + "hiddenSeries": false, + "id": 174, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.1.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "kafka_connect_connect_metrics_successful_authentication_rate{namespace=\"$ns\",pod=~\"$pod\",client_id!=\"\"}", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}-{{client_id}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Success authentication connections", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 0, + "format": "connections", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "description": "Average number of requests sent per second", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 49 + }, + "hiddenSeries": false, + "id": 172, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.1.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "kafka_connect_connect_metrics_request_rate{namespace=\"$ns\",pod=~\"$pod\",client_id!=\"\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}-{{client_id}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Average number of requests", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "reqps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "description": "Responses received and sent per second", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 49 + }, + "hiddenSeries": false, + "id": 173, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.1.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "kafka_connect_connect_metrics_response_rate{namespace=\"$ns\",pod=~\"$pod\",client_id!=\"\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}-{{client_id}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Responses received and sent", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "reqps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "description": "Fraction of time the I/O thread spent doing I/O", + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 49 + }, + "hiddenSeries": false, + "id": 93, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "percentage": false, + "pluginVersion": "8.1.3", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "kafka_connect_connect_metrics_io_ratio{namespace=\"$ns\",pod=~\"$pod\",client_id!=\"\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{instance}}-{{client_id}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "IO Ratio", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "title": "Connect Worker", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 27 + }, + "id": 132, + "panels": [ + { + "columns": [], + "datasource": "Prometheus", + "fontSize": "110%", + "gridPos": { + "h": 11, + "w": 24, + "x": 0, + "y": 28 + }, + "id": 129, + "pageSize": 100, + "showHeader": true, + "sort": { + "col": 8, + "desc": true + }, + "styles": [ + { + "alias": "", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "Time", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "__name__", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "class", + "align": "left", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "connector_class", + "preserveFormat": false, + "thresholds": [], + "type": "string", + "unit": "short", + "valueMaps": [] + }, + { + "alias": "", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "env", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "instance", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "job", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "Nb of Tasks destroyed", + "align": "auto", + "colorMode": "cell", + "colors": [ + "rgba(50, 172, 45, 0.97)", + "#B877D9", + "#B877D9" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "link": false, + "mappingType": 1, + "pattern": "Value #B", + "thresholds": [ + "1" + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "Value #A", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "Value #C", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "Value #D", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "", + "align": "auto", + "colorMode": "cell", + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "#F2495C" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "mappingType": 1, + "pattern": "status", + "thresholds": [ + "2" + ], + "type": "string", + "unit": "short", + "valueMaps": [ + { + "text": "running", + "value": "1" + }, + { + "text": "paused", + "value": "2" + }, + { + "text": "stopped", + "value": "3" + } + ] + }, + { + "alias": "name", + "align": "left", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "connector", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "type", + "align": "left", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "connector_type", + "thresholds": [], + "type": "string", + "unit": "short" + }, + { + "alias": "version", + "align": "left", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "connector_version", + "thresholds": [], + "type": "number", + "unit": "short" + }, + { + "alias": "Nb of tasks", + "align": "auto", + "colorMode": "cell", + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "mappingType": 1, + "pattern": "Value #E", + "thresholds": [ + "0", + "1" + ], + "type": "number", + "unit": "short" + }, + { + "alias": "Nb of Tasks running", + "align": "auto", + "colorMode": "cell", + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "mappingType": 1, + "pattern": "Value #F", + "thresholds": [ + "0", + "1" + ], + "type": "number", + "unit": "short" + }, + { + "alias": "Nb of Tasks failed", + "align": "auto", + "colorMode": "cell", + "colors": [ + "rgba(50, 172, 45, 0.97)", + "#F2495C", + "#F2495C" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "link": false, + "mappingType": 1, + "pattern": "Value #G", + "preserveFormat": false, + "thresholds": [ + "1" + ], + "type": "number", + "unit": "short", + "valueMaps": [ + { + "text": "0", + "value": "null" + } + ] + }, + { + "alias": "Nb of Tasks paused", + "align": "auto", + "colorMode": "cell", + "colors": [ + "rgba(50, 172, 45, 0.97)", + "#FF9830", + "#FF9830" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "mappingType": 1, + "pattern": "Value #H", + "thresholds": [ + "1" + ], + "type": "number", + "unit": "short", + "valueMaps": [ + { + "text": "0", + "value": "null" + } + ] + }, + { + "alias": "", + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 2, + "mappingType": 1, + "pattern": "Value #I", + "thresholds": [], + "type": "hidden", + "unit": "short" + }, + { + "alias": "Nb of Tasks unassigned", + "align": "auto", + "colorMode": "cell", + "colors": [ + "rgba(50, 172, 45, 0.97)", + "#FADE2A", + "#FADE2A" + ], + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "decimals": 0, + "mappingType": 1, + "pattern": "Value #J", + "thresholds": [ + "1" + ], + "type": "number", + "unit": "short" + }, + { + "alias": "", + "align": "right", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "expr": "label_replace(label_replace(label_replace(kafka_connect_connector_info{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\",status!=\"\"}, \"status\", \"1\", \"status\", \"running\"), \"status\", \"2\", \"status\", \"paused\"), \"status\", \"3\", \"status\", \"stopped\")", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "I" + }, + { + "expr": "kafka_connect_connector_info{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\",connector_type!=\"\"}", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + }, + { + "expr": "kafka_connect_connector_info{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\",connector_version!=\"\"}", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "C" + }, + { + "expr": "kafka_connect_connector_info{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\",connector_class!=\"\"}", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "D" + }, + { + "expr": "sum by (connector) (kafka_connect_connect_worker_metrics_connector_total_task_count{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\"})", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "E" + }, + { + "expr": "sum by (connector) (kafka_connect_connect_worker_metrics_connector_running_task_count{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\"})", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "F" + }, + { + "expr": "sum by (connector) (kafka_connect_connect_worker_metrics_connector_failed_task_count{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\"})", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "G" + }, + { + "expr": "sum by (connector) (kafka_connect_connect_worker_metrics_connector_paused_task_count{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\"})", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "H" + }, + { + "expr": "sum by (connector) (kafka_connect_connect_worker_metrics_connector_destroyed_task_count{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\"})", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "B" + }, + { + "expr": "sum by (connector) (kafka_connect_connect_worker_metrics_connector_unassigned_task_count{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\"})", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "J" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connectors", + "transform": "table", + "transformations": [], + "type": "table-old" + } + ], + "title": "Connector details", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 28 + }, + "id": 234, + "panels": [ + { + "datasource": "Prometheus", + "description": "Rebalances average time", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "opacity", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 29 + }, + "id": 209, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "kafka_connect_connect_worker_rebalance_metrics_rebalance_avg_time_ms{namespace=\"$ns\",pod=~\"$pod\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{kafka_connect_cluster_id}}-{{instance}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Rebalances average time", + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "Time since last rebalance", + "fieldConfig": { + "defaults": { + "decimals": 0, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#299c46", + "value": null + } + ] + }, + "unit": "clockms" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 24, + "x": 0, + "y": 37 + }, + "id": 230, + "interval": null, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "fieldOptions": { + "calcs": [ + "lastNotNull" + ] + }, + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.1.3", + "repeat": "instance", + "targets": [ + { + "expr": "kafka_connect_connect_worker_rebalance_metrics_time_since_last_rebalance_ms{namespace=\"$ns\",pod=~\"$pod\",job=\"connect\"} >= 0", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "($instance) Time since last rebalance ", + "type": "stat" + } + ], + "title": "Rebalances", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 29 + }, + "id": 112, + "panels": [ + { + "datasource": "Prometheus", + "description": "Average size of the batches processed by the connector", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 30 + }, + "id": 113, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "kafka_connect_connector_task_metrics_batch_size_avg{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{connector}}-{{task}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Batch Size Average", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "description": "Maximum size of the batches processed by the connector", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 30 + }, + "id": 114, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "kafka_connect_connector_task_metrics_batch_size_max{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{connector}}-{{task}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Batch Size Max", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "description": "Average percentage of the task’s offset commit attempts that succeeded", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 37 + }, + "id": 115, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "kafka_connect_connector_task_metrics_offset_commit_success_percentage{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{connector}}-{{task}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Offset commit success percentage", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "description": "The average time in milliseconds taken by this task to commit offsets", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 37 + }, + "id": 116, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "kafka_connect_connector_task_metrics_offset_commit_avg_time_ms{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{connector}}-{{task}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Offset commit Average Time", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "description": "The fraction of time this task has spent in the running state.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 44 + }, + "id": 117, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "kafka_connect_connector_task_metrics_running_ratio{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\"}", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{connector}}-{{task}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Running ratio", + "type": "timeseries" + } + ], + "title": "Task metrics", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 30 + }, + "id": 201, + "panels": [ + { + "datasource": "Prometheus", + "description": "Total number of failures seen by task", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 31 + }, + "id": 203, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "kafka_connect_task_error_metrics_total_record_failures{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{connector}}-{{task}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Total record failures", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "description": "Total number of errors seen by task", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 31 + }, + "id": 205, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "kafka_connect_task_error_metrics_total_record_errors{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{connector}}-{{task}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Total record errors", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "description": "Total number of records skipped by task", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 31 + }, + "id": 206, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "kafka_connect_task_error_metrics_total_records_skipped{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{connector}}-{{task}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Total record skipped", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "description": "The number of messages that was logged into either the dead letter queue or with Log4j", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 38 + }, + "id": 208, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "kafka_connect_task_error_metrics_total_errors_logged{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{connector}}-{{task}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Total errors logged", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "description": "Total number of retries made by task", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 38 + }, + "id": 207, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "kafka_connect_task_error_metrics_total_retries{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{connector}}-{{task}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Total retries", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "description": "Number of produce requests to the dead letter queue", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 45 + }, + "id": 202, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "kafka_connect_task_error_metrics_deadletterqueue_produce_requests{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{connector}}-{{task}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Dead letter queue Produce requests", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "description": "Number of produce requests to the dead letter queue", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 45 + }, + "id": 204, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "kafka_connect_task_error_metrics_deadletterqueue_produce_requests{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{connector}}-{{task}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Dead letter queue Produce requests", + "type": "timeseries" + } + ], + "title": "Task Errors metrics", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 31 + }, + "id": 139, + "panels": [ + { + "datasource": "Prometheus", + "description": "The average time in milliseconds taken by this task to poll for a batch of source records", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 32 + }, + "id": 140, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "kafka_connect_source_task_metrics_poll_batch_avg_time_ms{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\",task!=\"\"}", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{connector}}-{{task}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Poll Batch Average time", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "description": "The maximum time in milliseconds taken by this task to poll for a batch of source records", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 32 + }, + "id": 141, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "kafka_connect_source_task_metrics_poll_batch_max_time_ms{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\",task!=\"\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{connector}}-{{task}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Poll Batch Max time", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "description": "The average per-second number of records produced/polled (before transformation) by this task belonging to the named source connector in this worker.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ops" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 39 + }, + "id": 144, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "kafka_connect_source_task_metrics_source_record_poll_rate{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\",task!=\"\"}", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{connector}}-{{task}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Source Record Poll rate", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "description": "The average per-second number of records output from the transformations and written to Kafka for this task belonging to the named source connector in this worker. This is after transformations are applied and excludes any records filtered out by the transformations.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 39 + }, + "id": 143, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "kafka_connect_source_task_metrics_source_record_write_rate{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\",task!=\"\"}", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{connector}}-{{task}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Source Record Write rate", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "description": "The average number of records that have been produced by this task but not yet completely written to Kafka.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 46 + }, + "id": 142, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "kafka_connect_source_task_metrics_source_record_active_count_avg{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\",task!=\"\"}", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{connector}}-{{task}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Source Record Active Count average", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "description": "The maximum number of records that have been produced by this task but not yet completely written to Kafka.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 46 + }, + "id": 145, + "links": [], + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "kafka_connect_source_task_metrics_source_record_active_count_max{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\",task!=\"\"}", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{connector}}-{{task}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Source Record Active Count max", + "type": "timeseries" + } + ], + "title": "Source metrics", + "type": "row" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 32 + }, + "id": 134, + "panels": [ + { + "datasource": "Prometheus", + "description": "The number of topic partitions assigned to this task belonging to the named sink connector in this worker.", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 33 + }, + "id": 135, + "links": [], + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "kafka_connect_sink_task_metrics_partition_count{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\",task!=\"\"}", + "format": "time_series", + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{connector}}-{{task}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Partition Count", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "description": "The average time in milliseconds taken by this task to put a batch of sinks records", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 33 + }, + "id": 136, + "links": [], + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "kafka_connect_sink_task_metrics_put_batch_avg_time_ms{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\",task!=\"\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{connector}}-{{task}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Put Batch Average time", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "description": "The maximum time in milliseconds taken by this task to put a batch of sinks records", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 33 + }, + "id": 137, + "links": [], + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "kafka_connect_sink_task_metrics_put_batch_max_time_ms{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\",task!=\"\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{connector}}-{{task}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Put Batch Max time", + "type": "timeseries" + } + ], + "title": "Sink metrics", + "type": "row" + } + ], + "refresh": "1m", + "schemaVersion": 30, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "allValue": ".+", + "current": { + "selected": false, + "text": "dev", + "value": "dev" + }, + "datasource": "Prometheus", + "definition": "label_values(ns)", + "description": null, + "error": null, + "hide": 0, + "includeAll": false, + "label": "Namespace", + "multi": false, + "name": "ns", + "options": [], + "query": { + "query": "label_values(namespace)", + "refId": "Prometheus-ns-Variable-Query" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": ".+", + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": "Prometheus", + "definition": "label_values(kafka_connect_cluster_id)", + "description": null, + "error": null, + "hide": 0, + "includeAll": true, + "label": "Cluster ID", + "multi": true, + "name": "kafka_connect_cluster_id", + "options": [], + "query": { + "query": "label_values(kafka_connect_cluster_id)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "datasource": "Prometheus", + "definition": "label_values(kafka_connect_app_info{namespace=\"$ns\"},pod)", + "description": null, + "error": null, + "hide": 0, + "includeAll": true, + "label": "Pod", + "multi": false, + "name": "pod", + "options": [], + "query": { + "query": "label_values(kafka_connect_app_info{namespace=\"$ns\"},pod)", + "refId": "Prometheus-pod-Variable-Query" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": "Prometheus", + "definition": "label_values(kafka_connect_connector_task_metrics_pause_ratio{namespace=\"$ns\"},connector)", + "description": null, + "error": null, + "hide": 0, + "includeAll": true, + "label": "Connector name", + "multi": true, + "name": "connector", + "options": [], + "query": { + "query": "label_values(kafka_connect_connector_task_metrics_pause_ratio{namespace=\"$ns\"},connector)", + "refId": "Prometheus-connector-Variable-Query" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-12h", + "to": "now" + }, + "timepicker": { + "hidden": false, + "refresh_intervals": [ + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Kafka Connect cluster", + "uid": "AEaSQ97mz", + "version": 1 +} \ No newline at end of file diff --git a/cfk-prometheus-grafana/grafana/kafka-topics.json b/cfk-prometheus-grafana/grafana/kafka-topics.json new file mode 100644 index 00000000..9906db03 --- /dev/null +++ b/cfk-prometheus-grafana/grafana/kafka-topics.json @@ -0,0 +1,1061 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "id": 4, + "iteration": 1647426704713, + "links": [], + "panels": [ + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 19, + "panels": [], + "title": "Overview", + "type": "row" + }, + { + "datasource": null, + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "orange", + "value": 1000 + }, + { + "color": "red", + "value": 10000 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 0, + "y": 1 + }, + "id": 9, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "exemplar": true, + "expr": "sum(kafka_controller_kafkacontroller_globaltopiccount{namespace=\"$ns\"})", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Total # of Topics", + "type": "stat" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 13, + "x": 4, + "y": 1 + }, + "id": 2, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "exemplar": true, + "expr": "topk(10, sum without(instance) (rate(kafka_server_brokertopicmetrics_messagesinpersec{topic=~\"$topic\",namespace=\"$ns\"}[5m])))", + "interval": "", + "legendFormat": "{{topic}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Messages In", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 7, + "x": 17, + "y": 1 + }, + "id": 17, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "exemplar": true, + "expr": "topk(10, sum(kafka_log_log_size{namespace=\"$ns\",topic=~\"$topic\"}) by (topic))", + "interval": "", + "legendFormat": "{{topic}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Log size", + "type": "timeseries" + }, + { + "datasource": null, + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 10000 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 0, + "y": 6 + }, + "id": 11, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "exemplar": true, + "expr": "sum(kafka_controller_kafkacontroller_globalpartitioncount{namespace=\"$ns\"})", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Total # of Partitions", + "type": "stat" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 11 + }, + "id": 3, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "exemplar": true, + "expr": "topk(10, sum without(instance) (rate(kafka_server_brokertopicmetrics_bytesinpersec{topic=~\"$topic\",namespace=\"$ns\"}[5m])))", + "interval": "", + "legendFormat": "{{topic}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Bytes In", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 11 + }, + "id": 4, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "exemplar": true, + "expr": "topk(10, sum without(instance) (rate(kafka_server_brokertopicmetrics_bytesoutpersec{topic=~\"$topic\",namespace=\"$ns\"}[5m])))", + "interval": "", + "legendFormat": "{{topic}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Bytes Out", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 20 + }, + "id": 13, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "exemplar": true, + "expr": "topk(10, sum(rate(kafka_server_brokertopicmetrics_totalproducerequestspersec{ namespace=\"$ns\", topic=~\"$topic\"}[5m])) by (topic))", + "interval": "", + "legendFormat": "{{topic}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Produce Request per sec", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 20 + }, + "id": 15, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "exemplar": true, + "expr": "topk(10, sum(rate(kafka_server_brokertopicmetrics_totalfetchrequestspersec{ namespace=\"$ns\",topic=~\"$topic\"}[5m])) by (topic))", + "interval": "", + "legendFormat": "{{topic}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Fetch Request per sec", + "type": "timeseries" + }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 28 + }, + "id": 21, + "panels": [ + { + "datasource": "Prometheus", + "description": "", + "fieldConfig": { + "defaults": { + "custom": { + "align": null, + "displayMode": "auto", + "filterable": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "partition" + }, + "properties": [ + { + "id": "custom.width", + "value": 103 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "offset" + }, + "properties": [ + { + "id": "custom.width", + "value": 137 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "instance" + }, + "properties": [ + { + "id": "custom.width", + "value": 155 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "topic" + }, + "properties": [ + { + "id": "custom.width", + "value": 294 + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 29 + }, + "id": 6, + "options": { + "showHeader": true, + "sortBy": [ + { + "desc": false, + "displayName": "partition" + } + ] + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "exemplar": true, + "expr": "kafka_log_log_logstartoffset{namespace=\"$ns\",topic=\"$topic\"}", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Start Offset", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "__name__": true, + "env": true, + "instance": false, + "job": true + }, + "indexByName": { + "Time": 0, + "Value": 7, + "__name__": 1, + "env": 2, + "instance": 3, + "job": 4, + "partition": 6, + "topic": 5 + }, + "renameByName": { + "Value": "offset" + } + } + } + ], + "type": "table" + }, + { + "datasource": "Prometheus", + "description": "", + "fieldConfig": { + "defaults": { + "custom": { + "align": null, + "displayMode": "auto", + "filterable": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "partition" + }, + "properties": [ + { + "id": "custom.width", + "value": 103 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "offset" + }, + "properties": [ + { + "id": "custom.width", + "value": 105 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "topic" + }, + "properties": [ + { + "id": "custom.width", + "value": 289 + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 29 + }, + "id": 7, + "options": { + "showHeader": true, + "sortBy": [ + { + "desc": false, + "displayName": "partition" + } + ] + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "exemplar": true, + "expr": "kafka_log_log_logendoffset{namespace=\"$ns\",topic=\"$topic\"}", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "End Offset", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "__name__": true, + "env": true, + "instance": false, + "job": true + }, + "indexByName": { + "Time": 0, + "Value": 7, + "__name__": 1, + "env": 2, + "instance": 3, + "job": 4, + "partition": 6, + "topic": 5 + }, + "renameByName": { + "Value": "offset" + } + } + } + ], + "type": "table" + } + ], + "title": "Topic offsets", + "type": "row" + } + ], + "refresh": "1m", + "schemaVersion": 30, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "allValue": null, + "current": { + "selected": false, + "text": "dev", + "value": "dev" + }, + "datasource": "Prometheus", + "definition": "label_values(namespace)", + "description": null, + "error": null, + "hide": 0, + "includeAll": false, + "label": "Namespace", + "multi": false, + "name": "ns", + "options": [], + "query": { + "query": "label_values(namespace)", + "refId": "Prometheus-namespace-Variable-Query" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": "Prometheus", + "definition": "label_values(kafka_log_log_size{namespace=\"$ns\"},topic)", + "description": null, + "error": null, + "hide": 0, + "includeAll": true, + "label": "Topic name", + "multi": true, + "name": "topic", + "options": [], + "query": { + "query": "label_values(kafka_log_log_size,topic)", + "refId": "Prometheus-topic-Variable-Query" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-12h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Kafka Topics", + "uid": "vQT4b1-Mz", + "version": 1 +} \ No newline at end of file diff --git a/cfk-prometheus-grafana/grafana/ksqldb-cluster.json b/cfk-prometheus-grafana/grafana/ksqldb-cluster.json new file mode 100644 index 00000000..1c9cffef --- /dev/null +++ b/cfk-prometheus-grafana/grafana/ksqldb-cluster.json @@ -0,0 +1,3586 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 9, + "iteration": 1647515912982, + "links": [], + "liveNow": false, + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 29, + "panels": [], + "title": "Overview", + "type": "row" + }, + { + "description": "Average number of active queries per server.", + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#d44a3a", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 0 + }, + { + "color": "#299c46", + "value": 1 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 0, + "y": 1 + }, + "id": 18, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.4.2", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "a65Bu5Enk" + }, + "exemplar": false, + "expr": "avg(ksql_ksql_engine_query_stats_num_active_queries{namespace=\"$ns\", ksql_cluster=\"$ksqldb_cluster_id\", pod=~\"$pod\"})", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Active Queries", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "a65Bu5Enk" + }, + "description": "Num of created queries", + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#299c46", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 500 + }, + { + "color": "#d44a3a", + "value": 800 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 4, + "y": 1 + }, + "id": 20, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.4.2", + "targets": [ + { + "expr": "avg(ksql_ksql_engine_query_stats_running_queries{namespace=\"$ns\", ksql_cluster=\"$ksqldb_cluster_id\", pod=~\"$pod\"})", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Running Queries", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "a65Bu5Enk" + }, + "description": "Num of persisted queries", + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#d44a3a", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 0 + }, + { + "color": "#299c46", + "value": 1 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 8, + "y": 1 + }, + "id": 2, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.4.2", + "targets": [ + { + "expr": "avg(ksql_ksql_engine_query_stats_num_persistent_queries{namespace=\"$ns\", ksql_cluster=\"$ksqldb_cluster_id\", pod=~\"$pod\"})", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Total Persisted Queries", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "a65Bu5Enk" + }, + "description": "Num of rebalancing queries", + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#299c46", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 1 + }, + { + "color": "#d44a3a", + "value": 5 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 12, + "y": 1 + }, + "id": 16, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.4.2", + "targets": [ + { + "expr": "sum(ksql_ksql_engine_query_stats_rebalancing_queries{namespace=\"$ns\", ksql_cluster=\"$ksqldb_cluster_id\", pod=~\"$pod\"})", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Rebalancing Queries", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "a65Bu5Enk" + }, + "description": "Number of error query", + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#299c46", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 1 + }, + { + "color": "#d44a3a", + "value": 2 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 16, + "y": 1 + }, + "id": 4, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.4.2", + "targets": [ + { + "expr": "avg(ksql_ksql_engine_query_stats_error_queries{namespace=\"$ns\", ksql_cluster=\"$ksqldb_cluster_id\", pod=~\"$pod\"})", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Queries in Error State", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "a65Bu5Enk" + }, + "description": "Num of idle queries", + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#299c46", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 1 + }, + { + "color": "#d44a3a", + "value": 5 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 20, + "y": 1 + }, + "id": 19, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.4.2", + "targets": [ + { + "expr": "sum(ksql_ksql_engine_query_stats_num_idle_queries{namespace=\"$ns\", ksql_cluster=\"$ksqldb_cluster_id\", pod=~\"$pod\"})", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Idle Queries", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "a65Bu5Enk" + }, + "fieldConfig": { + "defaults": { + "custom": { + "align": "auto", + "displayMode": "auto", + "filterable": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "ksql_query" + }, + "properties": [ + { + "id": "custom.width", + "value": 426 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "instance" + }, + "properties": [ + { + "id": "custom.width", + "value": 381 + } + ] + } + ] + }, + "gridPos": { + "h": 9, + "w": 16, + "x": 0, + "y": 5 + }, + "id": 23, + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.4.2", + "targets": [ + { + "expr": "ksql_ksql_metrics_ksql_queries_query_status{namespace=\"$ns\", ksql_cluster=\"$ksqldb_cluster_id\", pod=~\"$pod\"}", + "format": "table", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Queries Status", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Value": true, + "__name__": true, + "env": true, + "job": true, + "ksql_cluster": true + }, + "indexByName": {}, + "renameByName": { + "Time": "", + "__name__": "", + "instance": "", + "ksql_cluster": "", + "ksql_query": "" + } + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "prometheus", + "uid": "a65Bu5Enk" + }, + "description": "Num of not running queries", + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#299c46", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 1 + }, + { + "color": "#d44a3a", + "value": 5 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 16, + "y": 5 + }, + "id": 5, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.4.2", + "targets": [ + { + "expr": "sum(ksql_ksql_engine_query_stats_not_running_queries{namespace=\"$ns\", ksql_cluster=\"$ksqldb_cluster_id\", pod=~\"$pod\"})", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Stopped Queries", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "a65Bu5Enk" + }, + "description": "Num of running queries", + "fieldConfig": { + "defaults": { + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#299c46", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 1 + }, + { + "color": "#d44a3a", + "value": 5 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 20, + "y": 5 + }, + "id": 15, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.4.2", + "targets": [ + { + "expr": "sum(ksql_ksql_engine_query_stats_pending_shutdown_queries{namespace=\"$ns\", ksql_cluster=\"$ksqldb_cluster_id\", pod=~\"$pod\"})", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "title": "Currently Shutting Down Queries", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "a65Bu5Enk" + }, + "description": "Cluster liveness", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "max": 1, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 8, + "x": 16, + "y": 9 + }, + "id": 17, + "links": [], + "maxDataPoints": 100, + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "ksql_ksql_engine_query_stats_liveness_indicator{namespace=\"$ns\", ksql_cluster=\"$ksqldb_cluster_id\", pod=~\"$pod\"}", + "interval": "", + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "Cluster liveness", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "a65Bu5Enk" + }, + "description": "Message consumed/sec", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 14 + }, + "id": 21, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "ksql_ksql_engine_query_stats_messages_consumed_per_sec{namespace=\"$ns\", ksql_cluster=\"$ksqldb_cluster_id\", pod=~\"$pod\"}", + "interval": "", + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "Message consumed/sec", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "a65Bu5Enk" + }, + "description": "Message produced/sec", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 14 + }, + "id": 7, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "irate(ksql_ksql_engine_query_stats_messages_produced_per_sec{namespace=\"$ns\", ksql_cluster=\"$ksqldb_cluster_id\", pod=~\"$pod\"}[5m])", + "interval": "", + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "Message produced/sec", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 22 + }, + "id": 33, + "panels": [], + "title": "System", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "a65Bu5Enk" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 23 + }, + "id": 12, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "irate(process_cpu_seconds_total{namespace=\"$ns\", pod=~\"$pod\"}[5m])", + "interval": "", + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "CPU Usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "a65Bu5Enk" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "decbytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 23 + }, + "id": 24, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "sum without(area)(jvm_memory_bytes_used{namespace=\"$ns\",pod=~\"$pod\"})", + "interval": "", + "legendFormat": "Used:{{instance}}", + "refId": "A" + }, + { + "expr": "jvm_memory_bytes_max{namespace=\"$ns\",area=\"heap\"}", + "interval": "", + "legendFormat": "Max:{{instance}}", + "refId": "B" + } + ], + "title": "JVM Memory Used", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "a65Bu5Enk" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 4, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 23 + }, + "id": 14, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "sum without(gc)(rate(jvm_gc_collection_seconds_sum{namespace=\"$ns\", pod=~\"$pod\"}[5m]))", + "interval": "", + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "title": "Time spent in GC", + "type": "timeseries" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 31 + }, + "id": 31, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "a65Bu5Enk" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 6, + "x": 0, + "y": 32 + }, + "id": 26, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "kafka_streams_stream_thread_metrics_poll_latency_avg{namespace=\"$ns\", thread_id=~\".+$ksqldb_cluster_id.+\", pod=~\"$pod\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "legendFormat": "{{thread_id}}_avg", + "refId": "A" + }, + { + "refId": "C" + } + ], + "title": "Poll Latency (Avg)", + "transformations": [], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "a65Bu5Enk" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 6, + "x": 6, + "y": 32 + }, + "id": 35, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "kafka_streams_stream_thread_metrics_poll_latency_max{namespace=\"$ns\", thread_id=~\".+$ksqldb_cluster_id.+\", pod=~\"$pod\"}", + "interval": "", + "legendFormat": "{{thread_id}}_max", + "refId": "B" + } + ], + "title": "Poll Latency (Max)", + "transformations": [], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "a65Bu5Enk" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 6, + "x": 12, + "y": 32 + }, + "id": 25, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "kafka_streams_stream_thread_metrics_process_latency_avg{namespace=\"$ns\", thread_id=~\".+$ksqldb_cluster_id.+\", pod=~\"$pod\"}", + "format": "time_series", + "instant": false, + "interval": "", + "legendFormat": "{{thread_id}}_avg", + "refId": "A" + } + ], + "title": "Process Latency (Avg)", + "transformations": [], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "a65Bu5Enk" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 6, + "x": 18, + "y": 32 + }, + "id": 34, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "kafka_streams_stream_thread_metrics_process_latency_max{namespace=\"$ns\", thread_id=~\".+$ksqldb_cluster_id.+\", pod=~\"$pod\"}", + "interval": "", + "legendFormat": "{{thread_id}}_max", + "refId": "B" + } + ], + "title": "Process Latency Max", + "transformations": [], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "a65Bu5Enk" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 6, + "x": 0, + "y": 43 + }, + "id": 13, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "kafka_streams_stream_thread_metrics_commit_latency_avg{namespace=\"$ns\", thread_id=~\".+$ksqldb_cluster_id.+\", pod=~\"$pod\"}", + "format": "time_series", + "instant": false, + "interval": "", + "legendFormat": "{{thread_id}}_avg", + "refId": "A" + } + ], + "title": "Commit Latency (Avg)", + "transformations": [], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "a65Bu5Enk" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 6, + "x": 6, + "y": 43 + }, + "id": 38, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "kafka_streams_stream_thread_metrics_commit_latency_max{namespace=\"$ns\", thread_id=~\".+$ksqldb_cluster_id.+\", pod=~\"$pod\"}", + "format": "time_series", + "instant": false, + "interval": "", + "legendFormat": "{{thread_id}}_avg", + "refId": "A" + } + ], + "title": "Commit Latency (Max)", + "transformations": [], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "a65Bu5Enk" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 6, + "x": 12, + "y": 43 + }, + "id": 27, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "kafka_streams_stream_thread_metrics_punctuate_latency_avg{namespace=\"$ns\", thread_id=~\".+$ksqldb_cluster_id.+\", pod=~\"$pod\"}", + "format": "time_series", + "instant": false, + "interval": "", + "legendFormat": "{{thread_id}}_avg", + "refId": "A" + } + ], + "title": "Punctuate Latency (Avg)", + "transformations": [], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "a65Bu5Enk" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 3, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/max/" + }, + "properties": [ + { + "id": "custom.lineStyle", + "value": { + "dash": [ + 5, + 2 + ], + "fill": "dash" + } + } + ] + } + ] + }, + "gridPos": { + "h": 11, + "w": 6, + "x": 18, + "y": 43 + }, + "id": 37, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "kafka_streams_stream_thread_metrics_punctuate_latency_max{namespace=\"$ns\", thread_id=~\".+$ksqldb_cluster_id.+\", pod=~\"$pod\"}", + "interval": "", + "legendFormat": "{{thread_id}}_max", + "refId": "B" + } + ], + "title": "Punctuate Latency (Max)", + "transformations": [], + "type": "timeseries" + } + ], + "title": "Queries Performance", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 32 + }, + "id": 40, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "a65Bu5Enk" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 3, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "iops" + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 6, + "x": 0, + "y": 33 + }, + "id": 36, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "kafka_streams_stream_state_metrics_put_rate{namespace=\"$ns\", thread_id=~\".+$ksqldb_cluster_id.+\", pod=~\"$pod\"}", + "interval": "", + "legendFormat": "{{thread_id}}", + "refId": "B" + } + ], + "title": "Put Rate", + "transformations": [], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "a65Bu5Enk" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 3, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 9, + "x": 6, + "y": 33 + }, + "id": 42, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "kafka_streams_stream_state_metrics_put_latency_avg{thread_id=~\".+$ksqldb_cluster_id.+\"}", + "interval": "", + "legendFormat": "{{thread_id}}", + "refId": "B" + } + ], + "title": "Put average latency", + "transformations": [], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "a65Bu5Enk" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 3, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 9, + "x": 15, + "y": 33 + }, + "id": 43, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "kafka_streams_stream_state_metrics_put_latency_max{thread_id=~\".+$ksqldb_cluster_id.+\"}", + "interval": "", + "legendFormat": "{{thread_id}}", + "refId": "B" + } + ], + "title": "Put max latency", + "transformations": [], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "a65Bu5Enk" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 3, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "iops" + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 6, + "x": 0, + "y": 44 + }, + "id": 52, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "kafka_streams_stream_state_metrics_put_if_absent_rate_rate{namespace=\"$ns\", thread_id=~\".+$ksqldb_cluster_id.+\", pod=~\"$pod\"}", + "interval": "", + "legendFormat": "{{thread_id}}", + "refId": "B" + } + ], + "title": "Put if absent rate", + "transformations": [], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "a65Bu5Enk" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 3, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 9, + "x": 6, + "y": 44 + }, + "id": 53, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "kafka_streams_stream_state_metrics_put_if_absent_latency_avg{thread_id=~\".+$ksqldb_cluster_id.+\"}", + "interval": "", + "legendFormat": "{{thread_id}}", + "refId": "B" + } + ], + "title": "Put if absent average latency", + "transformations": [], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "a65Bu5Enk" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 3, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 9, + "x": 15, + "y": 44 + }, + "id": 54, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "kafka_streams_stream_state_metrics_put_if_absent_latency_max{thread_id=~\".+$ksqldb_cluster_id.+\"}", + "interval": "", + "legendFormat": "{{thread_id}}", + "refId": "B" + } + ], + "title": "Put if absent max latency", + "transformations": [], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "a65Bu5Enk" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 3, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "iops" + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 6, + "x": 0, + "y": 55 + }, + "id": 41, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "kafka_streams_stream_state_metrics_fetch_rate{namespace=\"$ns\", thread_id=~\".+$ksqldb_cluster_id.+\", pod=~\"$pod\"}", + "interval": "", + "legendFormat": "{{thread_id}}", + "refId": "B" + } + ], + "title": "Fetch Rate", + "transformations": [], + "type": "timeseries" + }, + { + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 3, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 9, + "x": 6, + "y": 55 + }, + "id": 44, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "kafka_streams_stream_state_metrics_fetch_latency_avg{thread_id=~\".+$ksqldb_cluster_id.+\"}", + "interval": "", + "legendFormat": "{{thread_id}}", + "refId": "B" + } + ], + "title": "Fetch average latency", + "transformations": [], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "a65Bu5Enk" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 3, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 9, + "x": 15, + "y": 55 + }, + "id": 45, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "kafka_streams_stream_state_metrics_put_latency_max{thread_id=~\".+$ksqldb_cluster_id.+\"}", + "interval": "", + "legendFormat": "{{thread_id}}", + "refId": "B" + } + ], + "title": "Fetch max latency", + "transformations": [], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "a65Bu5Enk" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 3, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "iops" + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 6, + "x": 0, + "y": 66 + }, + "id": 46, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "kafka_streams_stream_state_metrics_delete_rate{namespace=\"$ns\", thread_id=~\".+$ksqldb_cluster_id.+\", pod=~\"$pod\"}", + "interval": "", + "legendFormat": "{{thread_id}}", + "refId": "B" + } + ], + "title": "Delete Rate", + "transformations": [], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "a65Bu5Enk" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 3, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 9, + "x": 6, + "y": 66 + }, + "id": 47, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "kafka_streams_stream_state_metrics_delete_latency_avg{thread_id=~\".+$ksqldb_cluster_id.+\"}", + "interval": "", + "legendFormat": "{{thread_id}}", + "refId": "B" + } + ], + "title": "Delete average latency", + "transformations": [], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "a65Bu5Enk" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 3, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 9, + "x": 15, + "y": 66 + }, + "id": 48, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "kafka_streams_stream_state_metrics_delete_latency_max{thread_id=~\".+$ksqldb_cluster_id.+\"}", + "interval": "", + "legendFormat": "{{thread_id}}", + "refId": "B" + } + ], + "title": "Delete max latency", + "transformations": [], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "a65Bu5Enk" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 3, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "iops" + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 6, + "x": 0, + "y": 77 + }, + "id": 49, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "kafka_streams_stream_state_metrics_restore_rate{namespace=\"$ns\", thread_id=~\".+$ksqldb_cluster_id.+\", pod=~\"$pod\"}", + "interval": "", + "legendFormat": "{{thread_id}}", + "refId": "B" + } + ], + "title": "Restore Rate", + "transformations": [], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "a65Bu5Enk" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 3, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 9, + "x": 6, + "y": 77 + }, + "id": 50, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "kafka_streams_stream_state_metrics_restore_latency_avg{thread_id=~\".+$ksqldb_cluster_id.+\"}", + "interval": "", + "legendFormat": "{{thread_id}}", + "refId": "B" + } + ], + "title": "Restore average latency", + "transformations": [], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "a65Bu5Enk" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 3, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 11, + "w": 9, + "x": 15, + "y": 77 + }, + "id": 51, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "kafka_streams_stream_state_metrics_restore_latency_max{thread_id=~\".+$ksqldb_cluster_id.+\"}", + "interval": "", + "legendFormat": "{{thread_id}}", + "refId": "B" + } + ], + "title": "Restore max latency", + "transformations": [], + "type": "timeseries" + } + ], + "title": "StateStore Metric", + "type": "row" + } + ], + "refresh": "1m", + "schemaVersion": 35, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "confluent", + "value": "confluent" + }, + "datasource": { + "type": "prometheus", + "uid": "a65Bu5Enk" + }, + "definition": "label_values(namespace)", + "hide": 0, + "includeAll": false, + "label": "Namespace", + "multi": false, + "name": "ns", + "options": [], + "query": { + "query": "label_values(namespace)", + "refId": "Prometheus-ns-Variable-Query" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "selected": false, + "text": "confluent.ksqldb_", + "value": "confluent.ksqldb_" + }, + "datasource": { + "type": "prometheus", + "uid": "a65Bu5Enk" + }, + "definition": "label_values(ksql_ksql_engine_query_stats_liveness_indicator,ksql_cluster)", + "hide": 0, + "includeAll": false, + "label": "Cluster ID", + "multi": false, + "name": "ksqldb_cluster_id", + "options": [], + "query": { + "query": "label_values(ksql_ksql_engine_query_stats_liveness_indicator,ksql_cluster)", + "refId": "Prometheus-ksqldb_cluster_id-Variable-Query" + }, + "refresh": 2, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "current": { + "selected": false, + "text": "All", + "value": "$__all" + }, + "datasource": { + "type": "prometheus", + "uid": "a65Bu5Enk" + }, + "definition": "label_values(ksql_ksql_engine_query_stats_liveness_indicator,pod)", + "hide": 0, + "includeAll": true, + "label": "Pod", + "multi": true, + "name": "pod", + "options": [], + "query": { + "query": "label_values(ksql_ksql_engine_query_stats_liveness_indicator,pod)", + "refId": "Prometheus-instance-Variable-Query" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-12h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ] + }, + "timezone": "", + "title": "ksqlDB cluster", + "uid": "pbx34foGk", + "version": 4, + "weekStart": "" +} \ No newline at end of file diff --git a/cfk-prometheus-grafana/grafana/schema-registry-cluster.json b/cfk-prometheus-grafana/grafana/schema-registry-cluster.json new file mode 100644 index 00000000..bbdc9dd0 --- /dev/null +++ b/cfk-prometheus-grafana/grafana/schema-registry-cluster.json @@ -0,0 +1,994 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "id": 4, + "iteration": 1632254298743, + "links": [], + "panels": [ + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 19, + "panels": [], + "title": "Schemas", + "type": "row" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 0, + "y": 1 + }, + "id": 2, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "exemplar": true, + "expr": "count(kafka_schema_registry_registered_count{namespace=\"$ns\"})", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Schema Registry Instances", + "type": "stat" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 4, + "y": 1 + }, + "id": 11, + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "exemplar": true, + "expr": "sum by(schema_type) (kafka_schema_registry_schemas_created{namespace=\"$ns\"})", + "interval": "", + "legendFormat": "{{schema_type}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Schema registered over time", + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "decimals": 0, + "mappings": [], + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 4, + "x": 16, + "y": 1 + }, + "id": 8, + "interval": null, + "links": [], + "options": { + "legend": { + "calcs": [], + "displayMode": "table", + "placement": "bottom", + "values": [ + "value" + ] + }, + "pieType": "pie", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "7.3.4", + "targets": [ + { + "exemplar": true, + "expr": "sum by(schema_type)(kafka_schema_registry_schemas_created{namespace=\"$ns\"})", + "instant": true, + "interval": "", + "legendFormat": "{{schema_type}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Schemas created", + "type": "piechart" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + } + }, + "decimals": 0, + "mappings": [], + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 4, + "x": 20, + "y": 1 + }, + "id": 9, + "interval": null, + "links": [], + "options": { + "legend": { + "calcs": [], + "displayMode": "table", + "placement": "bottom", + "values": [ + "value" + ] + }, + "pieType": "pie", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "7.3.4", + "targets": [ + { + "exemplar": true, + "expr": "avg by(schema_type)(kafka_schema_registry_schemas_deleted{namespace=\"$ns\"})", + "instant": true, + "interval": "", + "legendFormat": "{{schema_type}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Schemas deleted", + "type": "piechart" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 0, + "y": 5 + }, + "id": 25, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "exemplar": true, + "expr": "sum(kafka_schema_registry_registered_count{namespace=\"$ns\"})", + "instant": true, + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Schemas registered", + "type": "stat" + }, + { + "collapsed": false, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 9 + }, + "id": 15, + "panels": [], + "title": "System", + "type": "row" + }, + { + "datasource": "Prometheus", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 10 + }, + "id": 13, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "irate(process_cpu_seconds_total{namespace=\"$ns\"}[5m])*100", + "interval": "", + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Usage", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 10 + }, + "id": 21, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "sum without(area)(jvm_memory_bytes_used{namespace=\"$ns\"})", + "interval": "", + "legendFormat": "Used:{{instance}}", + "refId": "A" + }, + { + "expr": "jvm_memory_bytes_max{namespace=\"$ns\",area=\"heap\"}", + "interval": "", + "legendFormat": "Max:{{instance}}", + "refId": "B" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "JVM Memory Used", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 3, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 10 + }, + "id": 23, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "sum without(gc)(rate(jvm_gc_collection_seconds_sum{namespace=\"$ns\"}[5m]))", + "interval": "", + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Time spent in GC", + "type": "timeseries" + }, + { + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 17 + }, + "id": 17, + "title": "Connections", + "type": "row" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 0, + "y": 18 + }, + "id": 4, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "kafka_schema_registry_jetty_metrics_connections_active{namespace=\"$ns\"}", + "interval": "", + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Active Connections", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 8, + "y": 18 + }, + "id": 6, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "kafka_schema_registry_jersey_metrics_request_rate{namespace=\"$ns\"}", + "interval": "", + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Requests Rate", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 8, + "x": 16, + "y": 18 + }, + "id": 24, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "kafka_schema_registry_jersey_metrics_request_latency_99{namespace=\"$ns\"}", + "interval": "", + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Requests latency 99p", + "type": "timeseries" + } + ], + "refresh": "1m", + "schemaVersion": 30, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "allValue": null, + "current": { + "selected": false, + "text": "dev", + "value": "dev" + }, + "datasource": "Prometheus", + "definition": "label_values(namespace)", + "description": null, + "error": null, + "hide": 0, + "includeAll": false, + "label": "Namespace", + "multi": false, + "name": "ns", + "options": [], + "query": { + "query": "label_values(namespace)", + "refId": "Prometheus-ns-Variable-Query" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-12h", + "to": "now" + }, + "timepicker": {}, + "timezone": "", + "title": "Schema Registry cluster", + "uid": "9ixzve-Mk", + "version": 2 +} \ No newline at end of file diff --git a/cfk-prometheus-grafana/grafana/zookeeper-cluster.json b/cfk-prometheus-grafana/grafana/zookeeper-cluster.json new file mode 100644 index 00000000..676e1d46 --- /dev/null +++ b/cfk-prometheus-grafana/grafana/zookeeper-cluster.json @@ -0,0 +1,1048 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "id": 5, + "iteration": 1632253434096, + "links": [], + "panels": [ + { + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 22, + "title": "Health Check", + "type": "row" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "Quorum Size of Zookeeper ensemble", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "#d44a3a", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 2 + }, + { + "color": "#299c46", + "value": 3 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 0, + "y": 1 + }, + "id": 2, + "interval": null, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "count(zookeeper_status_quorumsize{namespace=\"$ns\"})", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Zookeeper nodes online", + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "Number of Alive Connections", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-green", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 100 + }, + { + "color": "#d44a3a", + "value": 200 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 4, + "y": 1 + }, + "id": 4, + "interval": null, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "sum(zookeeper_numaliveconnections{namespace=\"$ns\"})", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Alive Connections", + "type": "stat" + }, + { + "datasource": "Prometheus", + "description": "Number of queued requests in the server. This goes up when the server receives more requests than it can process", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line+area" + } + }, + "decimals": 0, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "transparent", + "value": null + }, + { + "color": "red", + "value": 10 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 16, + "x": 8, + "y": 1 + }, + "id": 7, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "zookeeper_outstandingrequests{namespace=\"$ns\"}", + "interval": "", + "legendFormat": "{{server_id}}:{{member_type}} ({{pod}})", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Outstanding Requests", + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-green", + "value": null + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 0, + "y": 5 + }, + "id": 3, + "interval": null, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "avg(zookeeper_inmemorydatatree_nodecount{namespace=\"$ns\"})", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Number of ZNodes", + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "Number of Watchers", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "dark-green", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 500 + }, + { + "color": "#d44a3a", + "value": 1000 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 4, + "y": 5 + }, + "id": 5, + "interval": null, + "links": [], + "maxDataPoints": 100, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "text": {}, + "textMode": "auto" + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "sum(zookeeper_inmemorydatatree_watchcount{namespace=\"$ns\"})", + "interval": "", + "legendFormat": "", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Number of Watchers", + "type": "stat" + }, + { + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 9 + }, + "id": 20, + "title": "System", + "type": "row" + }, + { + "datasource": "Prometheus", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 10 + }, + "id": 12, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "irate(process_cpu_seconds_total{namespace=\"$ns\"}[5m])*100", + "interval": "", + "legendFormat": "{{pod}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU Usage", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 10 + }, + "id": 13, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "sum without(area)(jvm_memory_bytes_used{namespace=\"$ns\"})", + "interval": "", + "legendFormat": "Used:{{pod}}", + "refId": "A" + }, + { + "expr": "jvm_memory_bytes_max{namespace=\"$env\",area=\"heap\"}", + "interval": "", + "legendFormat": "Max:{{pod}}", + "refId": "B" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "JVM Memory Used", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 3, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 10 + }, + "id": 14, + "options": { + "legend": { + "calcs": [ + "mean", + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "sum without(gc)(rate(jvm_gc_collection_seconds_sum{namespace=\"$ns\"}[5m]))", + "interval": "", + "legendFormat": "{{pod}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Time spent in GC", + "type": "timeseries" + }, + { + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 17 + }, + "id": 18, + "title": "Request Latency", + "type": "row" + }, + { + "datasource": "Prometheus", + "description": "Amount of time it takes for the server to respond to a client request", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 18 + }, + "id": 9, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "zookeeper_minrequestlatency{namespace=\"$ns\"} * zookeeper_ticktime", + "interval": "", + "legendFormat": "{{server_id}}:{{member_type}} ({{pod}})", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Request Latency - Minimum", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "description": "Amount of time it takes for the server to respond to a client request", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "decimals": 0, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 18 + }, + "id": 10, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "exemplar": true, + "expr": "zookeeper_avgrequestlatency{namespace=\"$ns\"} * zookeeper_ticktime", + "interval": "", + "legendFormat": "{{server_id}}:{{member_type}} ({{pod}})", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Request Latency - Average", + "type": "timeseries" + }, + { + "datasource": "Prometheus", + "description": "Amount of time it takes for the server to respond to a client request", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 18 + }, + "id": 11, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "8.1.3", + "targets": [ + { + "expr": "zookeeper_maxrequestlatency{namespace=\"$ns\"} * zookeeper_ticktime", + "interval": "", + "legendFormat": "{{server_id}}:{{member_type}} ({{pod}})", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Request Latency - Maximum", + "type": "timeseries" + } + ], + "refresh": "1m", + "schemaVersion": 30, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "allValue": null, + "current": { + "selected": false, + "text": "dev", + "value": "dev" + }, + "datasource": "Prometheus", + "definition": "label_values(namespace)", + "description": null, + "error": null, + "hide": 0, + "includeAll": false, + "label": "Namespace", + "multi": false, + "name": "ns", + "options": [], + "query": { + "query": "label_values(namespace)", + "refId": "Prometheus-ns-Variable-Query" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-12h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ] + }, + "timezone": "", + "title": "Zookeeper cluster", + "uid": "H4xS98vWk", + "version": 1 +} \ No newline at end of file From 78ea7963dd60a25e86911d05aaedd37ab88844da Mon Sep 17 00:00:00 2001 From: Jorge Esteban Quilcate Otoya Date: Fri, 18 Mar 2022 11:06:12 +0000 Subject: [PATCH 02/28] docs(cfk): add readme --- cfk-prometheus-grafana/README.md | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 cfk-prometheus-grafana/README.md diff --git a/cfk-prometheus-grafana/README.md b/cfk-prometheus-grafana/README.md new file mode 100644 index 00000000..0edc525c --- /dev/null +++ b/cfk-prometheus-grafana/README.md @@ -0,0 +1,10 @@ +# Prometheus and Grafana stack for CFK (Confluent for Kubernetes) + +## Requirements + +- Prometheus and Grafana deployed on Kubernetes: https://artifacthub.io/packages/helm/prometheus-community/prometheus + +## How to run + +- Include metrics configuration in the Confluent Platform CRDs, following [this](./cfk/confluent-platform.yaml). +- Deploy Grafana dashboards. Very similar to the ones [here](../jmxexporter-prometheus-grafana) but tweaked with Namespace and Pod variables. From e0a04eb015858a8089af6a351eaa7446362f88fe Mon Sep 17 00:00:00 2001 From: Jorge Esteban Quilcate Otoya Date: Wed, 22 Jun 2022 21:12:53 +0100 Subject: [PATCH 03/28] feat: initial grafanalib row --- grafana-dashboards/Makefile | 2 + grafana-dashboards/confluent-platform.json | 352 +++++++++++++++++++++ grafana-dashboards/confluent-platform.py | 53 ++++ 3 files changed, 407 insertions(+) create mode 100644 grafana-dashboards/Makefile create mode 100644 grafana-dashboards/confluent-platform.json create mode 100644 grafana-dashboards/confluent-platform.py diff --git a/grafana-dashboards/Makefile b/grafana-dashboards/Makefile new file mode 100644 index 00000000..13951ed8 --- /dev/null +++ b/grafana-dashboards/Makefile @@ -0,0 +1,2 @@ +all: + generate-dashboard -o confluent-platform.json confluent-platform.py diff --git a/grafana-dashboards/confluent-platform.json b/grafana-dashboards/confluent-platform.json new file mode 100644 index 00000000..f03260a4 --- /dev/null +++ b/grafana-dashboards/confluent-platform.json @@ -0,0 +1,352 @@ +{ + "__inputs": [], + "annotations": { + "list": [] + }, + "description": "Overview of the main health-check metrics from Confluent Platform components.", + "editable": true, + "gnetId": null, + "hideControls": false, + "id": null, + "links": [], + "panels": [], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "editable": true, + "height": "250px", + "panels": [ + { + "cacheTimeout": null, + "datasource": "default", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 16, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 1, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": 4, + "targets": [ + { + "datasource": "grafana", + "expr": "example", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Random Walk", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "default", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "calcs": [ + "mean" + ], + "decimals": null, + "limit": null, + "links": [], + "mappings": [], + "max": 100, + "min": 0, + "override": {}, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "red", + "index": 1, + "line": true, + "op": "gt", + "value": 80.0, + "yaxis": "left" + } + ] + }, + "title": null, + "unit": "none", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 17, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 2, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "repeat": null, + "repeatDirection": null, + "span": 4, + "targets": [ + { + "datasource": "grafana", + "expr": "example", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Random Walk", + "transformations": [], + "transparent": false, + "type": "gauge" + }, + { + "cacheTimeout": null, + "datasource": "prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ops" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 16, + "x": 0, + "y": 10 + }, + "height": null, + "hideTimeOverride": false, + "id": 3, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": 4, + "targets": [ + { + "datasource": null, + "expr": "rate(prometheus_http_requests_total[5m])", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{ handler }}", + "metric": "", + "refId": "A", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Prometheus http requests", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "showTitle": true, + "title": "Zookeeper" + } + ], + "schemaVersion": 12, + "sharedCrosshair": false, + "style": "dark", + "tags": [ + "confluent", + "kafka", + "zookeeper", + "kafka-connect", + "schema-registry", + "ksqldb" + ], + "templating": { + "list": [] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "hidden": false, + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Confluent Platform overview - v2", + "uid": null, + "version": 0 +} diff --git a/grafana-dashboards/confluent-platform.py b/grafana-dashboards/confluent-platform.py new file mode 100644 index 00000000..f398af47 --- /dev/null +++ b/grafana-dashboards/confluent-platform.py @@ -0,0 +1,53 @@ +from grafanalib.core import ( + Dashboard, TimeSeries, GaugePanel, + Target, GridPos, Row, + OPS_FORMAT +) + +panels = [ + TimeSeries( + title="Random Walk", + dataSource='default', + targets=[ + Target( + datasource='grafana', + expr='example', + ), + ], + gridPos=GridPos(h=8, w=16, x=0, y=0), + ), + GaugePanel( + title="Random Walk", + dataSource='default', + targets=[ + Target( + datasource='grafana', + expr='example', + ), + ], + gridPos=GridPos(h=4, w=4, x=17, y=0), + ), + TimeSeries( + title="Prometheus http requests", + dataSource='prometheus', + targets=[ + Target( + expr='rate(prometheus_http_requests_total[5m])', + legendFormat="{{ handler }}", + refId='A', + ), + ], + unit=OPS_FORMAT, + gridPos=GridPos(h=8, w=16, x=0, y=10), + ), + ] + +dashboard = Dashboard( + title="Confluent Platform overview - v2", + description="Overview of the main health-check metrics from Confluent Platform components.", + tags=[ + 'confluent', 'kafka', 'zookeeper', 'kafka-connect', 'schema-registry', 'ksqldb' + ], + timezone="browser", + rows=[Row(title='Zookeeper',showTitle=True,panels=panels)], +).auto_panel_ids() From ee0016afa55b06964e6ffb4ee7c7485ff77ea1f0 Mon Sep 17 00:00:00 2001 From: Jorge Esteban Quilcate Otoya Date: Wed, 22 Jun 2022 21:13:14 +0100 Subject: [PATCH 04/28] chore: update gitignore --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 1d52ed62..756e7aac 100644 --- a/.gitignore +++ b/.gitignore @@ -5,4 +5,5 @@ metricbeat-elastic-kibana/assets/security/ both-enabled/ .vscode/ utils/testing/output/* -utils/testing/code/*.ipynb \ No newline at end of file +utils/testing/code/*.ipynb +__pycache__/ From 49a6e931d6943db4f29a4150fbe3ffc4778f224b Mon Sep 17 00:00:00 2001 From: Jorge Esteban Quilcate Otoya Date: Thu, 23 Jun 2022 16:38:24 +0100 Subject: [PATCH 05/28] feat: zookeeper row --- grafana-dashboards/confluent-platform.json | 779 ++++++++++++++------- grafana-dashboards/confluent-platform.py | 98 ++- 2 files changed, 576 insertions(+), 301 deletions(-) diff --git a/grafana-dashboards/confluent-platform.json b/grafana-dashboards/confluent-platform.json index f03260a4..9e279c42 100644 --- a/grafana-dashboards/confluent-platform.json +++ b/grafana-dashboards/confluent-platform.json @@ -1,5 +1,14 @@ { - "__inputs": [], + "__inputs": [ + { + "description": "", + "label": "Prometheus", + "name": "DS_PROMETHEUS", + "pluginId": "prometheus", + "pluginName": "Prometheus", + "type": "datasource" + } + ], "annotations": { "list": [] }, @@ -9,298 +18,498 @@ "hideControls": false, "id": null, "links": [], - "panels": [], - "refresh": "10s", - "rows": [ + "panels": [ { - "collapse": false, + "cacheTimeout": null, + "collapsed": false, + "datasource": null, + "description": null, "editable": true, - "height": "250px", - "panels": [ - { - "cacheTimeout": null, - "datasource": "default", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 1, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Zookeeper", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] + { + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 2.0, + "yaxis": "left" }, - "unit": "" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 16, - "x": 0, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 1, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } + { + "color": "green", + "index": 2, + "line": true, + "op": "gt", + "value": 3.0, + "yaxis": "left" + } + ] }, - "repeat": null, - "repeatDirection": null, - "span": 4, - "targets": [ - { - "datasource": "grafana", - "expr": "example", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 2, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "mean" ], - "timeFrom": null, - "timeShift": null, - "title": "Random Walk", - "transformations": [], - "transparent": false, - "type": "timeseries" + "fields": "", + "values": false }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ { - "cacheTimeout": null, - "datasource": "default", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "calcs": [ - "mean" - ], - "decimals": null, - "limit": null, - "links": [], - "mappings": [], - "max": 100, - "min": 0, - "override": {}, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - }, - { - "color": "red", - "index": 1, - "line": true, - "op": "gt", - "value": 80.0, - "yaxis": "left" - } - ] - }, - "title": null, - "unit": "none", - "values": false - }, - "showThresholdLabels": false, - "showThresholdMarkers": true + "datasource": null, + "expr": "count(zookeeper_status_quorumsize{namespace=\"$ns\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ZK: Quorum Size", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] }, - "gridPos": { - "h": 4, - "w": 4, - "x": 17, - "y": 0 + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 4, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 3, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "avg(zookeeper_inmemorydatatree_nodecount{namespace=\"$ns\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ZK: Avg. number of ZNodes", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] }, - "height": null, - "hideTimeOverride": false, - "id": 2, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "repeat": null, - "repeatDirection": null, - "span": 4, - "targets": [ - { - "datasource": "grafana", - "expr": "example", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 8, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 4, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "mean" ], - "timeFrom": null, - "timeShift": null, - "title": "Random Walk", - "transformations": [], - "transparent": false, - "type": "gauge" + "fields": "", + "values": false }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ { - "cacheTimeout": null, - "datasource": "prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ops" - }, - "overrides": [] + "datasource": null, + "expr": "sum(zookeeper_numaliveconnections{namespace=\"$ns\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ZK: Sum of number of Alive Connections", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] }, - "gridPos": { - "h": 8, - "w": 16, - "x": 0, - "y": 10 + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 12, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 5, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(zookeeper_inmemorydatatree_watchcount{namespace=\"$ns\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ZK: Sum of watchers", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" }, - "height": null, - "hideTimeOverride": false, - "id": 3, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" }, - "tooltip": { - "mode": "single" + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" } }, - "repeat": null, - "repeatDirection": null, - "span": 4, - "targets": [ - { - "datasource": null, - "expr": "rate(prometheus_http_requests_total[5m])", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{ handler }}", - "metric": "", - "refId": "A", - "step": 10, - "target": "" - } + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 8, + "x": 16, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 6, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "last" ], - "timeFrom": null, - "timeShift": null, - "title": "Prometheus http requests", - "transformations": [], - "transparent": false, - "type": "timeseries" + "displayMode": "table", + "placement": "right" + }, + "tooltip": { + "mode": "single" } - ], + }, "repeat": null, - "showTitle": true, - "title": "Zookeeper" + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "zookeeper_outstandingrequests{namespace=\"$ns\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ZK: Outstanding Requests", + "transformations": [], + "transparent": false, + "type": "timeseries" } ], + "refresh": "10s", + "rows": [], "schemaVersion": 12, "sharedCrosshair": false, "style": "dark", @@ -313,7 +522,35 @@ "ksqldb" ], "templating": { - "list": [] + "list": [ + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "Prometheus", + "hide": 0, + "includeAll": false, + "label": "Namespace", + "multi": false, + "name": "ns", + "options": [], + "query": "label_values(namespace)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + } + ] }, "time": { "from": "now-1h", diff --git a/grafana-dashboards/confluent-platform.py b/grafana-dashboards/confluent-platform.py index f398af47..67be5b20 100644 --- a/grafana-dashboards/confluent-platform.py +++ b/grafana-dashboards/confluent-platform.py @@ -1,53 +1,91 @@ -from grafanalib.core import ( - Dashboard, TimeSeries, GaugePanel, - Target, GridPos, Row, - OPS_FORMAT -) +import grafanalib.core as G + +defaultHeight=5 +statWidth=4 panels = [ - TimeSeries( - title="Random Walk", - dataSource='default', + G.RowPanel( + title='Zookeeper', + gridPos=G.GridPos(h=1, w=24, x=0, y=0), + ), + G.Stat( + title="ZK: Quorum Size", + dataSource='${DS_PROMETHEUS}', + targets=[ + G.Target( + expr='count(zookeeper_status_quorumsize{namespace="$ns"})', + ), + ], + thresholds=[ + G.Threshold(index=0, value=0.0, color="red"), + G.Threshold(index=1, value=2.0, color="yellow"), + G.Threshold(index=2, value=3.0, color="green"), + ], + gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 0, y=0), + ), + G.Stat( + title="ZK: Avg. number of ZNodes", + dataSource='${DS_PROMETHEUS}', targets=[ - Target( - datasource='grafana', - expr='example', + G.Target( + expr='avg(zookeeper_inmemorydatatree_nodecount{namespace="$ns"})', ), ], - gridPos=GridPos(h=8, w=16, x=0, y=0), + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 1, y=0), ), - GaugePanel( - title="Random Walk", - dataSource='default', + G.Stat( + title="ZK: Sum of number of Alive Connections", + dataSource='${DS_PROMETHEUS}', targets=[ - Target( - datasource='grafana', - expr='example', + G.Target( + expr='sum(zookeeper_numaliveconnections{namespace="$ns"})', ), ], - gridPos=GridPos(h=4, w=4, x=17, y=0), + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 2, y=0), + ), + G.Stat( + title="ZK: Sum of watchers", + dataSource='${DS_PROMETHEUS}', + targets=[ + G.Target( + expr='sum(zookeeper_inmemorydatatree_watchcount{namespace="$ns"})', + ), + ], + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 3, y=0), ), - TimeSeries( - title="Prometheus http requests", - dataSource='prometheus', + G.TimeSeries( + title="ZK: Outstanding Requests", + dataSource='${DS_PROMETHEUS}', targets=[ - Target( - expr='rate(prometheus_http_requests_total[5m])', - legendFormat="{{ handler }}", - refId='A', + G.Target( + expr='zookeeper_outstandingrequests{namespace="$ns"}', + legendFormat="{{pod}}", ), ], - unit=OPS_FORMAT, - gridPos=GridPos(h=8, w=16, x=0, y=10), + legendDisplayMode="table", + legendCalcs=["max","last"], + legendPlacement="right", + gridPos=G.GridPos(h=defaultHeight, w=8, x=statWidth * 4, y=0), ), ] -dashboard = Dashboard( +dashboard = G.Dashboard( title="Confluent Platform overview - v2", description="Overview of the main health-check metrics from Confluent Platform components.", tags=[ 'confluent', 'kafka', 'zookeeper', 'kafka-connect', 'schema-registry', 'ksqldb' ], + inputs=[G.DataSourceInput(name="DS_PROMETHEUS",label="Prometheus",pluginId="prometheus",pluginName="Prometheus")], + templating=G.Templating(list=[G.Template(name='ns',label='Namespace',dataSource='Prometheus',query='label_values(namespace)')]), timezone="browser", - rows=[Row(title='Zookeeper',showTitle=True,panels=panels)], + panels=panels, ).auto_panel_ids() From bd030f1b6e7c11b02afc6a6481f5b07b0889fc80 Mon Sep 17 00:00:00 2001 From: Jorge Esteban Quilcate Otoya Date: Fri, 24 Jun 2022 16:12:18 +0100 Subject: [PATCH 06/28] feat: cp dashboard completed --- grafana-dashboards/confluent-platform.json | 2046 +++++++++++++++++++- grafana-dashboards/confluent-platform.py | 361 +++- 2 files changed, 2364 insertions(+), 43 deletions(-) diff --git a/grafana-dashboards/confluent-platform.json b/grafana-dashboards/confluent-platform.json index 9e279c42..a5bf9eed 100644 --- a/grafana-dashboards/confluent-platform.json +++ b/grafana-dashboards/confluent-platform.json @@ -55,7 +55,7 @@ "targets": [], "timeFrom": null, "timeShift": null, - "title": "Zookeeper", + "title": "Zookeeper cluster", "transformations": [], "transparent": false, "type": "row" @@ -126,7 +126,7 @@ "orientation": "auto", "reduceOptions": { "calcs": [ - "mean" + "last" ], "fields": "", "values": false @@ -209,7 +209,7 @@ "orientation": "auto", "reduceOptions": { "calcs": [ - "mean" + "last" ], "fields": "", "values": false @@ -292,7 +292,7 @@ "orientation": "auto", "reduceOptions": { "calcs": [ - "mean" + "last" ], "fields": "", "values": false @@ -375,7 +375,7 @@ "orientation": "auto", "reduceOptions": { "calcs": [ - "mean" + "last" ], "fields": "", "values": false @@ -506,42 +506,2008 @@ "transformations": [], "transparent": false, "type": "timeseries" - } - ], - "refresh": "10s", - "rows": [], - "schemaVersion": 12, - "sharedCrosshair": false, - "style": "dark", - "tags": [ - "confluent", - "kafka", - "zookeeper", - "kafka-connect", - "schema-registry", - "ksqldb" - ], - "templating": { - "list": [ - { - "allValue": null, - "auto": false, - "auto_count": 30, - "auto_min": "10s", - "current": { - "selected": false, - "tags": [], - "text": null, - "value": null + }, + { + "cacheTimeout": null, + "collapsed": false, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 7, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Kafka cluster", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" }, - "datasource": "Prometheus", - "hide": 0, - "includeAll": false, - "label": "Namespace", - "multi": false, - "name": "ns", - "options": [], - "query": "label_values(namespace)", + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 0, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 8, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "count(kafka_server_replicamanager_leadercount{namespace=\"$ns\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Online Brokers", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 4, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 9, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "value_and_name" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_controller_kafkacontroller_activecontrollercount{namespace=\"$ns\"} > 0", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Active Controller", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 8, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 10, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_server_replicamanager_partitioncount{namespace=\"$ns\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Sum of Partitions", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "red", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 12, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 11, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_server_replicamanager_underreplicatedpartitions{namespace=\"$ns\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Sum of Partitions Under-Replicated (URP)", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "red", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 16, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 12, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_cluster_partition_underminisr{namespace=\"$ns\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Sum of Partitions Under-MinISR", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "red", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 20, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 13, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_controller_kafkacontroller_offlinepartitionscount{namespace=\"$ns\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Sum of Partitions Offline", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "collapsed": false, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 14, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Schema Registry cluster", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + }, + { + "color": "green", + "index": 2, + "line": true, + "op": "gt", + "value": 2.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 0, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 15, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "count(kafka_schema_registry_registered_count{namespace=\"$ns\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "SR: Online instances", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 4, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 16, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_schema_registry_registered_count{namespace=\"$ns\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "SR: Sum of Registered Schemas", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 8, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 17, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_schema_registry_schemas_deleted{namespace=\"$ns\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "SR: Sum of Deleted Schemas", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "collapsed": false, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 18, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [], + "repeat": "connect_app", + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Kafka Connect cluster: $connect_app", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 0, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 19, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "count(kafka_connect_connect_worker_metrics_connector_count{namespace=\"$ns\",app=~\"$connect_app\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connect: Online Workers", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 4, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 20, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_connect_connect_worker_metrics_connector_total_task_count{namespace=\"$ns\",app=~\"$connect_app\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connect: Sum of Total Tasks", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "green", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 8, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 21, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_connect_connect_worker_metrics_connector_running_task_count{namespace=\"$ns\",app=~\"$connect_app\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connect: Sum of Running Tasks", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 12, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 22, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_connect_connect_worker_metrics_connector_paused_task_count{namespace=\"$ns\",app=~\"$connect_app\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connect: Sum of Paused Tasks", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "red", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 16, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 23, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_connect_connect_worker_metrics_connector_failed_task_count{namespace=\"$ns\",app=~\"$connect_app\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connect: Sum of Failed Tasks", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "clockms" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 20, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 24, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_connect_worker_rebalance_metrics_time_since_last_rebalance_ms{namespace=\"$ns\",app=~\"$connect_app\"} >= 0", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connect: Time since last rebalance", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "collapsed": false, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 25, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [], + "repeat": "ksqldb_app", + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "ksqlDB cluster: $ksqldb_app", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 0, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 26, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "count(ksql_ksql_engine_query_stats_num_active_queries{namespace=\"$ns\", app=\"$ksqldb_app\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ksqlDB: Online Servers", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 4, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 27, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(ksql_ksql_engine_query_stats_num_active_queries{namespace=\"$ns\", app=\"$ksqldb_app\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ksqlDB: Sum of Active Queries", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "green", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 8, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 28, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(ksql_ksql_engine_query_stats_running_queries{namespace=\"$ns\", app=\"$ksqldb_app\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ksqlDB: Sum of Running Queries", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 12, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 29, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(ksql_ksql_engine_query_stats_rebalancing_queries{namespace=\"$ns\", app=\"$ksqldb_app\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ksqlDB: Sum of Rebalancing Queries", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "red", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 16, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 30, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "avg(ksql_ksql_engine_query_stats_error_queries{namespace=\"$ns\", app=\"$ksqldb_app\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connect: Sum of Failed Queries", + "transformations": [], + "transparent": false, + "type": "stat" + } + ], + "refresh": "10s", + "rows": [], + "schemaVersion": 12, + "sharedCrosshair": false, + "style": "dark", + "tags": [ + "confluent", + "kafka", + "zookeeper", + "kafka-connect", + "schema-registry", + "ksqldb" + ], + "templating": { + "list": [ + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "Prometheus", + "hide": 0, + "includeAll": false, + "label": "Namespace", + "multi": false, + "name": "ns", + "options": [], + "query": "label_values(namespace)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "Prometheus", + "hide": 2, + "includeAll": false, + "label": "Kafka Connect cluster", + "multi": false, + "name": "connect_app", + "options": [], + "query": "label_values(kafka_connect_connect_worker_metrics_connector_count{namespace=\"$ns\"}, app)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "Prometheus", + "hide": 2, + "includeAll": false, + "label": "ksqlDB cluster", + "multi": false, + "name": "ksqldb_app", + "options": [], + "query": "label_values(ksql_ksql_engine_query_stats_num_active_queries{namespace=\"$ns\"},app)", "refresh": 1, "regex": null, "sort": 1, diff --git a/grafana-dashboards/confluent-platform.py b/grafana-dashboards/confluent-platform.py index 67be5b20..f3e8b46f 100644 --- a/grafana-dashboards/confluent-platform.py +++ b/grafana-dashboards/confluent-platform.py @@ -3,9 +3,32 @@ defaultHeight=5 statWidth=4 -panels = [ +templating=G.Templating(list=[ + G.Template( + name='ns', + label='Namespace', + dataSource='Prometheus', + query='label_values(namespace)', + ), + G.Template( + name='connect_app', + label='Kafka Connect cluster', + dataSource='Prometheus', + query='label_values(kafka_connect_connect_worker_metrics_connector_count{namespace="$ns"}, app)', + hide=2, + ), + G.Template( + name='ksqldb_app', + label='ksqlDB cluster', + dataSource='Prometheus', + query='label_values(ksql_ksql_engine_query_stats_num_active_queries{namespace="$ns"},app)', + hide=2, + ), +]) + +zk_panels = [ G.RowPanel( - title='Zookeeper', + title='Zookeeper cluster', gridPos=G.GridPos(h=1, w=24, x=0, y=0), ), G.Stat( @@ -16,6 +39,7 @@ expr='count(zookeeper_status_quorumsize{namespace="$ns"})', ), ], + reduceCalc='last', thresholds=[ G.Threshold(index=0, value=0.0, color="red"), G.Threshold(index=1, value=2.0, color="yellow"), @@ -31,6 +55,7 @@ expr='avg(zookeeper_inmemorydatatree_nodecount{namespace="$ns"})', ), ], + reduceCalc='last', thresholds=[ G.Threshold(index=0, value=0.0, color="blue"), ], @@ -44,6 +69,7 @@ expr='sum(zookeeper_numaliveconnections{namespace="$ns"})', ), ], + reduceCalc='last', thresholds=[ G.Threshold(index=0, value=0.0, color="blue"), ], @@ -57,6 +83,7 @@ expr='sum(zookeeper_inmemorydatatree_watchcount{namespace="$ns"})', ), ], + reduceCalc='last', thresholds=[ G.Threshold(index=0, value=0.0, color="blue"), ], @@ -78,6 +105,334 @@ ), ] +kafka_panels = [ + G.RowPanel( + title='Kafka cluster', + gridPos=G.GridPos(h=1, w=24, x=0, y=1), + ), + G.Stat( + title="Kafka: Online Brokers", + dataSource='${DS_PROMETHEUS}', + targets=[ + G.Target( + expr='count(kafka_server_replicamanager_leadercount{namespace="$ns"})', + ), + ], + reduceCalc='last', + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 0, y=1), + ), + G.Stat( + title="Kafka: Active Controller", + dataSource='${DS_PROMETHEUS}', + targets=[ + G.Target( + expr='kafka_controller_kafkacontroller_activecontrollercount{namespace="$ns"} > 0', + legendFormat="{{pod}}", + ), + ], + reduceCalc='last', + textMode='value_and_name', + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 1, y=1), + ), + G.Stat( + title="Kafka: Sum of Partitions", + dataSource='${DS_PROMETHEUS}', + targets=[ + G.Target( + expr='sum(kafka_server_replicamanager_partitioncount{namespace="$ns"})', + ), + ], + reduceCalc='last', + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 2, y=1), + ), + G.Stat( + title="Kafka: Sum of Partitions Under-Replicated (URP)", + dataSource='${DS_PROMETHEUS}', + targets=[ + G.Target( + expr='sum(kafka_server_replicamanager_underreplicatedpartitions{namespace="$ns"})', + ), + ], + reduceCalc='last', + thresholds=[ + G.Threshold(index=0, value=0.0, color="green"), + G.Threshold(index=1, value=1.0, color="red"), + ], + gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 3, y=1), + ), + G.Stat( + title="Kafka: Sum of Partitions Under-MinISR", + dataSource='${DS_PROMETHEUS}', + targets=[ + G.Target( + expr='sum(kafka_cluster_partition_underminisr{namespace="$ns"})', + ), + ], + reduceCalc='last', + thresholds=[ + G.Threshold(index=0, value=0.0, color="green"), + G.Threshold(index=1, value=1.0, color="red"), + ], + gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 4, y=1), + ), + G.Stat( + title="Kafka: Sum of Partitions Offline", + dataSource='${DS_PROMETHEUS}', + targets=[ + G.Target( + expr='sum(kafka_controller_kafkacontroller_offlinepartitionscount{namespace="$ns"})', + ), + ], + reduceCalc='last', + thresholds=[ + G.Threshold(index=0, value=0.0, color="green"), + G.Threshold(index=1, value=1.0, color="red"), + ], + gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 5, y=1), + ), + ] + +sr_panels = [ + G.RowPanel( + title='Schema Registry cluster', + gridPos=G.GridPos(h=1, w=24, x=0, y=2), + ), + G.Stat( + title="SR: Online instances", + dataSource='${DS_PROMETHEUS}', + targets=[ + G.Target( + expr='count(kafka_schema_registry_registered_count{namespace="$ns"})', + ), + ], + reduceCalc='last', + thresholds=[ + G.Threshold(index=0, value=0.0, color="red"), + G.Threshold(index=1, value=1.0, color="yellow"), + G.Threshold(index=2, value=2.0, color="green"), + ], + gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 0, y=2), + ), + G.Stat( + title="SR: Sum of Registered Schemas", + dataSource='${DS_PROMETHEUS}', + targets=[ + G.Target( + expr='sum(kafka_schema_registry_registered_count{namespace="$ns"})', + ), + ], + reduceCalc='last', + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 1, y=2), + ), + G.Stat( + title="SR: Sum of Deleted Schemas", + dataSource='${DS_PROMETHEUS}', + targets=[ + G.Target( + expr='sum(kafka_schema_registry_schemas_deleted{namespace="$ns"})', + ), + ], + reduceCalc='last', + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 2, y=2), + ), + ] + +connect_panels = [ + G.RowPanel( + title='Kafka Connect cluster: $connect_app', + gridPos=G.GridPos(h=1, w=24, x=0, y=3), + repeat=G.Repeat(variable='connect_app'), + ), + G.Stat( + title="Connect: Online Workers", + dataSource='${DS_PROMETHEUS}', + targets=[ + G.Target( + expr='count(kafka_connect_connect_worker_metrics_connector_count{namespace="$ns",app=~"$connect_app"})', + ), + ], + reduceCalc='last', + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 0, y=3), + ), + G.Stat( + title="Connect: Sum of Total Tasks", + dataSource='${DS_PROMETHEUS}', + targets=[ + G.Target( + expr='sum(kafka_connect_connect_worker_metrics_connector_total_task_count{namespace="$ns",app=~"$connect_app"})', + ), + ], + reduceCalc='last', + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 1, y=3), + ), + G.Stat( + title="Connect: Sum of Running Tasks", + dataSource='${DS_PROMETHEUS}', + targets=[ + G.Target( + expr='sum(kafka_connect_connect_worker_metrics_connector_running_task_count{namespace="$ns",app=~"$connect_app"})', + ), + ], + reduceCalc='last', + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + G.Threshold(index=1, value=1.0, color="green"), + ], + gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 2, y=3), + ), + G.Stat( + title="Connect: Sum of Paused Tasks", + dataSource='${DS_PROMETHEUS}', + targets=[ + G.Target( + expr='sum(kafka_connect_connect_worker_metrics_connector_paused_task_count{namespace="$ns",app=~"$connect_app"})', + ), + ], + reduceCalc='last', + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + G.Threshold(index=1, value=1.0, color="yellow"), + ], + gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 3, y=3), + ), + G.Stat( + title="Connect: Sum of Failed Tasks", + dataSource='${DS_PROMETHEUS}', + targets=[ + G.Target( + expr='sum(kafka_connect_connect_worker_metrics_connector_failed_task_count{namespace="$ns",app=~"$connect_app"})', + ), + ], + reduceCalc='last', + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + G.Threshold(index=1, value=1.0, color="red"), + ], + gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 4, y=3), + ), + G.Stat( + title="Connect: Time since last rebalance", + dataSource='${DS_PROMETHEUS}', + targets=[ + G.Target( + expr='kafka_connect_connect_worker_rebalance_metrics_time_since_last_rebalance_ms{namespace="$ns",app=~"$connect_app"} >= 0', + legendFormat="{{pod}}", + ), + ], + reduceCalc='last', + format='clockms', + graphMode='none', + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 5, y=3), + ), + ] + +ksqldb_panels = [ + G.RowPanel( + title='ksqlDB cluster: $ksqldb_app', + gridPos=G.GridPos(h=1, w=24, x=0, y=4), + repeat=G.Repeat(variable='ksqldb_app'), + ), + G.Stat( + title="ksqlDB: Online Servers", + dataSource='${DS_PROMETHEUS}', + targets=[ + G.Target( + expr='count(ksql_ksql_engine_query_stats_num_active_queries{namespace="$ns", app="$ksqldb_app"})', + ), + ], + reduceCalc='last', + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 0, y=4), + ), + G.Stat( + title="ksqlDB: Sum of Active Queries", + dataSource='${DS_PROMETHEUS}', + targets=[ + G.Target( + expr='sum(ksql_ksql_engine_query_stats_num_active_queries{namespace="$ns", app="$ksqldb_app"})', + ), + ], + reduceCalc='last', + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 1, y=4), + ), + G.Stat( + title="ksqlDB: Sum of Running Queries", + dataSource='${DS_PROMETHEUS}', + targets=[ + G.Target( + expr='sum(ksql_ksql_engine_query_stats_running_queries{namespace="$ns", app="$ksqldb_app"})', + ), + ], + reduceCalc='last', + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + G.Threshold(index=1, value=1.0, color="green"), + ], + gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 2, y=4), + ), + G.Stat( + title="ksqlDB: Sum of Rebalancing Queries", + dataSource='${DS_PROMETHEUS}', + targets=[ + G.Target( + expr='sum(ksql_ksql_engine_query_stats_rebalancing_queries{namespace="$ns", app="$ksqldb_app"})', + ), + ], + reduceCalc='last', + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + G.Threshold(index=1, value=1.0, color="yellow"), + ], + gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 3, y=4), + ), + G.Stat( + title="Connect: Sum of Failed Queries", + dataSource='${DS_PROMETHEUS}', + targets=[ + G.Target( + expr='avg(ksql_ksql_engine_query_stats_error_queries{namespace="$ns", app="$ksqldb_app"})', + ), + ], + reduceCalc='last', + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + G.Threshold(index=1, value=1.0, color="red"), + ], + gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 4, y=4), + ), + ] + +panels = zk_panels + kafka_panels + sr_panels + connect_panels + ksqldb_panels + dashboard = G.Dashboard( title="Confluent Platform overview - v2", description="Overview of the main health-check metrics from Confluent Platform components.", @@ -85,7 +440,7 @@ 'confluent', 'kafka', 'zookeeper', 'kafka-connect', 'schema-registry', 'ksqldb' ], inputs=[G.DataSourceInput(name="DS_PROMETHEUS",label="Prometheus",pluginId="prometheus",pluginName="Prometheus")], - templating=G.Templating(list=[G.Template(name='ns',label='Namespace',dataSource='Prometheus',query='label_values(namespace)')]), + templating=templating, timezone="browser", panels=panels, ).auto_panel_ids() From 17975b409cf0317a278d475e6a33625498bf80e9 Mon Sep 17 00:00:00 2001 From: Jorge Esteban Quilcate Otoya Date: Tue, 28 Jun 2022 20:28:41 +0100 Subject: [PATCH 07/28] feat: kafka request panels --- grafana-dashboards/Makefile | 2 + grafana-dashboards/confluent-platform.json | 4 +- grafana-dashboards/confluent-platform.py | 853 +++++------ grafana-dashboards/kafka-cluster.json | 1525 ++++++++++++++++++++ grafana-dashboards/kafka-cluster.py | 277 ++++ grafana-dashboards/zookeeper-cluster.json | 1269 ++++++++++++++++ grafana-dashboards/zookeeper-cluster.py | 220 +++ 7 files changed, 3729 insertions(+), 421 deletions(-) create mode 100644 grafana-dashboards/kafka-cluster.json create mode 100644 grafana-dashboards/kafka-cluster.py create mode 100644 grafana-dashboards/zookeeper-cluster.json create mode 100644 grafana-dashboards/zookeeper-cluster.py diff --git a/grafana-dashboards/Makefile b/grafana-dashboards/Makefile index 13951ed8..4bf86dd0 100644 --- a/grafana-dashboards/Makefile +++ b/grafana-dashboards/Makefile @@ -1,2 +1,4 @@ all: generate-dashboard -o confluent-platform.json confluent-platform.py + generate-dashboard -o zookeeper-cluster.json zookeeper-cluster.py + generate-dashboard -o kafka-cluster.json kafka-cluster.py diff --git a/grafana-dashboards/confluent-platform.json b/grafana-dashboards/confluent-platform.json index a5bf9eed..b58498b5 100644 --- a/grafana-dashboards/confluent-platform.json +++ b/grafana-dashboards/confluent-platform.json @@ -493,7 +493,7 @@ "instant": false, "interval": "", "intervalFactor": 2, - "legendFormat": "{{pod}}", + "legendFormat": "{{pod}} ({{server_id}}:{{member_type}})", "metric": "", "refId": "", "step": 10, @@ -2420,7 +2420,7 @@ "type": "stat" } ], - "refresh": "10s", + "refresh": "30s", "rows": [], "schemaVersion": 12, "sharedCrosshair": false, diff --git a/grafana-dashboards/confluent-platform.py b/grafana-dashboards/confluent-platform.py index f3e8b46f..33f623f3 100644 --- a/grafana-dashboards/confluent-platform.py +++ b/grafana-dashboards/confluent-platform.py @@ -1,435 +1,437 @@ import grafanalib.core as G -defaultHeight=5 -statWidth=4 +defaultHeight = 5 +statWidth = 4 -templating=G.Templating(list=[ - G.Template( - name='ns', - label='Namespace', - dataSource='Prometheus', - query='label_values(namespace)', - ), - G.Template( - name='connect_app', - label='Kafka Connect cluster', - dataSource='Prometheus', - query='label_values(kafka_connect_connect_worker_metrics_connector_count{namespace="$ns"}, app)', - hide=2, - ), - G.Template( - name='ksqldb_app', - label='ksqlDB cluster', - dataSource='Prometheus', - query='label_values(ksql_ksql_engine_query_stats_num_active_queries{namespace="$ns"},app)', - hide=2, - ), -]) - -zk_panels = [ - G.RowPanel( - title='Zookeeper cluster', - gridPos=G.GridPos(h=1, w=24, x=0, y=0), +templating = G.Templating( + list=[ + G.Template( + name="ns", + label="Namespace", + dataSource="Prometheus", + query="label_values(namespace)", ), - G.Stat( - title="ZK: Quorum Size", - dataSource='${DS_PROMETHEUS}', - targets=[ - G.Target( - expr='count(zookeeper_status_quorumsize{namespace="$ns"})', - ), - ], - reduceCalc='last', - thresholds=[ - G.Threshold(index=0, value=0.0, color="red"), - G.Threshold(index=1, value=2.0, color="yellow"), - G.Threshold(index=2, value=3.0, color="green"), - ], - gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 0, y=0), + G.Template( + name="connect_app", + label="Kafka Connect cluster", + dataSource="Prometheus", + query='label_values(kafka_connect_connect_worker_metrics_connector_count{namespace="$ns"}, app)', + hide=2, ), - G.Stat( - title="ZK: Avg. number of ZNodes", - dataSource='${DS_PROMETHEUS}', - targets=[ - G.Target( - expr='avg(zookeeper_inmemorydatatree_nodecount{namespace="$ns"})', - ), - ], - reduceCalc='last', - thresholds=[ - G.Threshold(index=0, value=0.0, color="blue"), - ], - gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 1, y=0), - ), - G.Stat( - title="ZK: Sum of number of Alive Connections", - dataSource='${DS_PROMETHEUS}', - targets=[ - G.Target( - expr='sum(zookeeper_numaliveconnections{namespace="$ns"})', - ), - ], - reduceCalc='last', - thresholds=[ - G.Threshold(index=0, value=0.0, color="blue"), - ], - gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 2, y=0), - ), - G.Stat( - title="ZK: Sum of watchers", - dataSource='${DS_PROMETHEUS}', - targets=[ - G.Target( - expr='sum(zookeeper_inmemorydatatree_watchcount{namespace="$ns"})', - ), - ], - reduceCalc='last', - thresholds=[ - G.Threshold(index=0, value=0.0, color="blue"), - ], - gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 3, y=0), - ), - G.TimeSeries( - title="ZK: Outstanding Requests", - dataSource='${DS_PROMETHEUS}', - targets=[ - G.Target( - expr='zookeeper_outstandingrequests{namespace="$ns"}', - legendFormat="{{pod}}", - ), - ], - legendDisplayMode="table", - legendCalcs=["max","last"], - legendPlacement="right", - gridPos=G.GridPos(h=defaultHeight, w=8, x=statWidth * 4, y=0), + G.Template( + name="ksqldb_app", + label="ksqlDB cluster", + dataSource="Prometheus", + query='label_values(ksql_ksql_engine_query_stats_num_active_queries{namespace="$ns"},app)', + hide=2, ), ] +) + +zk_panels = [ + G.RowPanel( + title="Zookeeper cluster", + gridPos=G.GridPos(h=1, w=24, x=0, y=0), + ), + G.Stat( + title="ZK: Quorum Size", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='count(zookeeper_status_quorumsize{namespace="$ns"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="red"), + G.Threshold(index=1, value=2.0, color="yellow"), + G.Threshold(index=2, value=3.0, color="green"), + ], + gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 0, y=0), + ), + G.Stat( + title="ZK: Avg. number of ZNodes", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='avg(zookeeper_inmemorydatatree_nodecount{namespace="$ns"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 1, y=0), + ), + G.Stat( + title="ZK: Sum of number of Alive Connections", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='sum(zookeeper_numaliveconnections{namespace="$ns"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 2, y=0), + ), + G.Stat( + title="ZK: Sum of watchers", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='sum(zookeeper_inmemorydatatree_watchcount{namespace="$ns"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 3, y=0), + ), + G.TimeSeries( + title="ZK: Outstanding Requests", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='zookeeper_outstandingrequests{namespace="$ns"}', + legendFormat="{{pod}} ({{server_id}}:{{member_type}})", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "last"], + legendPlacement="right", + gridPos=G.GridPos(h=defaultHeight, w=8, x=statWidth * 4, y=0), + ), +] kafka_panels = [ - G.RowPanel( - title='Kafka cluster', - gridPos=G.GridPos(h=1, w=24, x=0, y=1), - ), - G.Stat( - title="Kafka: Online Brokers", - dataSource='${DS_PROMETHEUS}', - targets=[ - G.Target( - expr='count(kafka_server_replicamanager_leadercount{namespace="$ns"})', - ), - ], - reduceCalc='last', - thresholds=[ - G.Threshold(index=0, value=0.0, color="blue"), - ], - gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 0, y=1), - ), - G.Stat( - title="Kafka: Active Controller", - dataSource='${DS_PROMETHEUS}', - targets=[ - G.Target( - expr='kafka_controller_kafkacontroller_activecontrollercount{namespace="$ns"} > 0', - legendFormat="{{pod}}", - ), - ], - reduceCalc='last', - textMode='value_and_name', - thresholds=[ - G.Threshold(index=0, value=0.0, color="blue"), - ], - gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 1, y=1), - ), - G.Stat( - title="Kafka: Sum of Partitions", - dataSource='${DS_PROMETHEUS}', - targets=[ - G.Target( - expr='sum(kafka_server_replicamanager_partitioncount{namespace="$ns"})', - ), - ], - reduceCalc='last', - thresholds=[ - G.Threshold(index=0, value=0.0, color="blue"), - ], - gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 2, y=1), - ), - G.Stat( - title="Kafka: Sum of Partitions Under-Replicated (URP)", - dataSource='${DS_PROMETHEUS}', - targets=[ - G.Target( - expr='sum(kafka_server_replicamanager_underreplicatedpartitions{namespace="$ns"})', - ), - ], - reduceCalc='last', - thresholds=[ - G.Threshold(index=0, value=0.0, color="green"), - G.Threshold(index=1, value=1.0, color="red"), - ], - gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 3, y=1), - ), - G.Stat( - title="Kafka: Sum of Partitions Under-MinISR", - dataSource='${DS_PROMETHEUS}', - targets=[ - G.Target( - expr='sum(kafka_cluster_partition_underminisr{namespace="$ns"})', - ), - ], - reduceCalc='last', - thresholds=[ - G.Threshold(index=0, value=0.0, color="green"), - G.Threshold(index=1, value=1.0, color="red"), - ], - gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 4, y=1), - ), - G.Stat( - title="Kafka: Sum of Partitions Offline", - dataSource='${DS_PROMETHEUS}', - targets=[ - G.Target( - expr='sum(kafka_controller_kafkacontroller_offlinepartitionscount{namespace="$ns"})', - ), - ], - reduceCalc='last', - thresholds=[ - G.Threshold(index=0, value=0.0, color="green"), - G.Threshold(index=1, value=1.0, color="red"), - ], - gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 5, y=1), - ), - ] + G.RowPanel( + title="Kafka cluster", + gridPos=G.GridPos(h=1, w=24, x=0, y=1), + ), + G.Stat( + title="Kafka: Online Brokers", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='count(kafka_server_replicamanager_leadercount{namespace="$ns"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 0, y=1), + ), + G.Stat( + title="Kafka: Active Controller", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_controller_kafkacontroller_activecontrollercount{namespace="$ns"} > 0', + legendFormat="{{pod}}", + ), + ], + reduceCalc="last", + textMode="value_and_name", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 1, y=1), + ), + G.Stat( + title="Kafka: Sum of Partitions", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='sum(kafka_server_replicamanager_partitioncount{namespace="$ns"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 2, y=1), + ), + G.Stat( + title="Kafka: Sum of Partitions Under-Replicated (URP)", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='sum(kafka_server_replicamanager_underreplicatedpartitions{namespace="$ns"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="green"), + G.Threshold(index=1, value=1.0, color="red"), + ], + gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 3, y=1), + ), + G.Stat( + title="Kafka: Sum of Partitions Under-MinISR", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='sum(kafka_cluster_partition_underminisr{namespace="$ns"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="green"), + G.Threshold(index=1, value=1.0, color="red"), + ], + gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 4, y=1), + ), + G.Stat( + title="Kafka: Sum of Partitions Offline", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='sum(kafka_controller_kafkacontroller_offlinepartitionscount{namespace="$ns"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="green"), + G.Threshold(index=1, value=1.0, color="red"), + ], + gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 5, y=1), + ), +] sr_panels = [ - G.RowPanel( - title='Schema Registry cluster', - gridPos=G.GridPos(h=1, w=24, x=0, y=2), - ), - G.Stat( - title="SR: Online instances", - dataSource='${DS_PROMETHEUS}', - targets=[ - G.Target( - expr='count(kafka_schema_registry_registered_count{namespace="$ns"})', - ), - ], - reduceCalc='last', - thresholds=[ - G.Threshold(index=0, value=0.0, color="red"), - G.Threshold(index=1, value=1.0, color="yellow"), - G.Threshold(index=2, value=2.0, color="green"), - ], - gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 0, y=2), - ), - G.Stat( - title="SR: Sum of Registered Schemas", - dataSource='${DS_PROMETHEUS}', - targets=[ - G.Target( - expr='sum(kafka_schema_registry_registered_count{namespace="$ns"})', - ), - ], - reduceCalc='last', - thresholds=[ - G.Threshold(index=0, value=0.0, color="blue"), - ], - gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 1, y=2), - ), - G.Stat( - title="SR: Sum of Deleted Schemas", - dataSource='${DS_PROMETHEUS}', - targets=[ - G.Target( - expr='sum(kafka_schema_registry_schemas_deleted{namespace="$ns"})', - ), - ], - reduceCalc='last', - thresholds=[ - G.Threshold(index=0, value=0.0, color="blue"), - ], - gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 2, y=2), - ), - ] + G.RowPanel( + title="Schema Registry cluster", + gridPos=G.GridPos(h=1, w=24, x=0, y=2), + ), + G.Stat( + title="SR: Online instances", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='count(kafka_schema_registry_registered_count{namespace="$ns"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="red"), + G.Threshold(index=1, value=1.0, color="yellow"), + G.Threshold(index=2, value=2.0, color="green"), + ], + gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 0, y=2), + ), + G.Stat( + title="SR: Sum of Registered Schemas", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='sum(kafka_schema_registry_registered_count{namespace="$ns"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 1, y=2), + ), + G.Stat( + title="SR: Sum of Deleted Schemas", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='sum(kafka_schema_registry_schemas_deleted{namespace="$ns"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 2, y=2), + ), +] connect_panels = [ - G.RowPanel( - title='Kafka Connect cluster: $connect_app', - gridPos=G.GridPos(h=1, w=24, x=0, y=3), - repeat=G.Repeat(variable='connect_app'), - ), - G.Stat( - title="Connect: Online Workers", - dataSource='${DS_PROMETHEUS}', - targets=[ - G.Target( - expr='count(kafka_connect_connect_worker_metrics_connector_count{namespace="$ns",app=~"$connect_app"})', - ), - ], - reduceCalc='last', - thresholds=[ - G.Threshold(index=0, value=0.0, color="blue"), - ], - gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 0, y=3), - ), - G.Stat( - title="Connect: Sum of Total Tasks", - dataSource='${DS_PROMETHEUS}', - targets=[ - G.Target( - expr='sum(kafka_connect_connect_worker_metrics_connector_total_task_count{namespace="$ns",app=~"$connect_app"})', - ), - ], - reduceCalc='last', - thresholds=[ - G.Threshold(index=0, value=0.0, color="blue"), - ], - gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 1, y=3), - ), - G.Stat( - title="Connect: Sum of Running Tasks", - dataSource='${DS_PROMETHEUS}', - targets=[ - G.Target( - expr='sum(kafka_connect_connect_worker_metrics_connector_running_task_count{namespace="$ns",app=~"$connect_app"})', - ), - ], - reduceCalc='last', - thresholds=[ - G.Threshold(index=0, value=0.0, color="blue"), - G.Threshold(index=1, value=1.0, color="green"), - ], - gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 2, y=3), - ), - G.Stat( - title="Connect: Sum of Paused Tasks", - dataSource='${DS_PROMETHEUS}', - targets=[ - G.Target( - expr='sum(kafka_connect_connect_worker_metrics_connector_paused_task_count{namespace="$ns",app=~"$connect_app"})', - ), - ], - reduceCalc='last', - thresholds=[ - G.Threshold(index=0, value=0.0, color="blue"), - G.Threshold(index=1, value=1.0, color="yellow"), - ], - gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 3, y=3), - ), - G.Stat( - title="Connect: Sum of Failed Tasks", - dataSource='${DS_PROMETHEUS}', - targets=[ - G.Target( - expr='sum(kafka_connect_connect_worker_metrics_connector_failed_task_count{namespace="$ns",app=~"$connect_app"})', - ), - ], - reduceCalc='last', - thresholds=[ - G.Threshold(index=0, value=0.0, color="blue"), - G.Threshold(index=1, value=1.0, color="red"), - ], - gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 4, y=3), - ), - G.Stat( - title="Connect: Time since last rebalance", - dataSource='${DS_PROMETHEUS}', - targets=[ - G.Target( - expr='kafka_connect_connect_worker_rebalance_metrics_time_since_last_rebalance_ms{namespace="$ns",app=~"$connect_app"} >= 0', - legendFormat="{{pod}}", - ), - ], - reduceCalc='last', - format='clockms', - graphMode='none', - thresholds=[ - G.Threshold(index=0, value=0.0, color="blue"), - ], - gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 5, y=3), - ), - ] + G.RowPanel( + title="Kafka Connect cluster: $connect_app", + gridPos=G.GridPos(h=1, w=24, x=0, y=3), + repeat=G.Repeat(variable="connect_app"), + ), + G.Stat( + title="Connect: Online Workers", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='count(kafka_connect_connect_worker_metrics_connector_count{namespace="$ns",app=~"$connect_app"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 0, y=3), + ), + G.Stat( + title="Connect: Sum of Total Tasks", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='sum(kafka_connect_connect_worker_metrics_connector_total_task_count{namespace="$ns",app=~"$connect_app"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 1, y=3), + ), + G.Stat( + title="Connect: Sum of Running Tasks", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='sum(kafka_connect_connect_worker_metrics_connector_running_task_count{namespace="$ns",app=~"$connect_app"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + G.Threshold(index=1, value=1.0, color="green"), + ], + gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 2, y=3), + ), + G.Stat( + title="Connect: Sum of Paused Tasks", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='sum(kafka_connect_connect_worker_metrics_connector_paused_task_count{namespace="$ns",app=~"$connect_app"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + G.Threshold(index=1, value=1.0, color="yellow"), + ], + gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 3, y=3), + ), + G.Stat( + title="Connect: Sum of Failed Tasks", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='sum(kafka_connect_connect_worker_metrics_connector_failed_task_count{namespace="$ns",app=~"$connect_app"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + G.Threshold(index=1, value=1.0, color="red"), + ], + gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 4, y=3), + ), + G.Stat( + title="Connect: Time since last rebalance", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_connect_connect_worker_rebalance_metrics_time_since_last_rebalance_ms{namespace="$ns",app=~"$connect_app"} >= 0', + legendFormat="{{pod}}", + ), + ], + reduceCalc="last", + format="clockms", + graphMode="none", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 5, y=3), + ), +] ksqldb_panels = [ - G.RowPanel( - title='ksqlDB cluster: $ksqldb_app', - gridPos=G.GridPos(h=1, w=24, x=0, y=4), - repeat=G.Repeat(variable='ksqldb_app'), - ), - G.Stat( - title="ksqlDB: Online Servers", - dataSource='${DS_PROMETHEUS}', - targets=[ - G.Target( - expr='count(ksql_ksql_engine_query_stats_num_active_queries{namespace="$ns", app="$ksqldb_app"})', - ), - ], - reduceCalc='last', - thresholds=[ - G.Threshold(index=0, value=0.0, color="blue"), - ], - gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 0, y=4), - ), - G.Stat( - title="ksqlDB: Sum of Active Queries", - dataSource='${DS_PROMETHEUS}', - targets=[ - G.Target( - expr='sum(ksql_ksql_engine_query_stats_num_active_queries{namespace="$ns", app="$ksqldb_app"})', - ), - ], - reduceCalc='last', - thresholds=[ - G.Threshold(index=0, value=0.0, color="blue"), - ], - gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 1, y=4), - ), - G.Stat( - title="ksqlDB: Sum of Running Queries", - dataSource='${DS_PROMETHEUS}', - targets=[ - G.Target( - expr='sum(ksql_ksql_engine_query_stats_running_queries{namespace="$ns", app="$ksqldb_app"})', - ), - ], - reduceCalc='last', - thresholds=[ - G.Threshold(index=0, value=0.0, color="blue"), - G.Threshold(index=1, value=1.0, color="green"), - ], - gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 2, y=4), - ), - G.Stat( - title="ksqlDB: Sum of Rebalancing Queries", - dataSource='${DS_PROMETHEUS}', - targets=[ - G.Target( - expr='sum(ksql_ksql_engine_query_stats_rebalancing_queries{namespace="$ns", app="$ksqldb_app"})', - ), - ], - reduceCalc='last', - thresholds=[ - G.Threshold(index=0, value=0.0, color="blue"), - G.Threshold(index=1, value=1.0, color="yellow"), - ], - gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 3, y=4), - ), - G.Stat( - title="Connect: Sum of Failed Queries", - dataSource='${DS_PROMETHEUS}', - targets=[ - G.Target( - expr='avg(ksql_ksql_engine_query_stats_error_queries{namespace="$ns", app="$ksqldb_app"})', - ), - ], - reduceCalc='last', - thresholds=[ - G.Threshold(index=0, value=0.0, color="blue"), - G.Threshold(index=1, value=1.0, color="red"), - ], - gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 4, y=4), - ), - ] + G.RowPanel( + title="ksqlDB cluster: $ksqldb_app", + gridPos=G.GridPos(h=1, w=24, x=0, y=4), + repeat=G.Repeat(variable="ksqldb_app"), + ), + G.Stat( + title="ksqlDB: Online Servers", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='count(ksql_ksql_engine_query_stats_num_active_queries{namespace="$ns", app="$ksqldb_app"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 0, y=4), + ), + G.Stat( + title="ksqlDB: Sum of Active Queries", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='sum(ksql_ksql_engine_query_stats_num_active_queries{namespace="$ns", app="$ksqldb_app"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 1, y=4), + ), + G.Stat( + title="ksqlDB: Sum of Running Queries", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='sum(ksql_ksql_engine_query_stats_running_queries{namespace="$ns", app="$ksqldb_app"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + G.Threshold(index=1, value=1.0, color="green"), + ], + gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 2, y=4), + ), + G.Stat( + title="ksqlDB: Sum of Rebalancing Queries", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='sum(ksql_ksql_engine_query_stats_rebalancing_queries{namespace="$ns", app="$ksqldb_app"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + G.Threshold(index=1, value=1.0, color="yellow"), + ], + gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 3, y=4), + ), + G.Stat( + title="Connect: Sum of Failed Queries", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='avg(ksql_ksql_engine_query_stats_error_queries{namespace="$ns", app="$ksqldb_app"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + G.Threshold(index=1, value=1.0, color="red"), + ], + gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 4, y=4), + ), +] panels = zk_panels + kafka_panels + sr_panels + connect_panels + ksqldb_panels @@ -437,10 +439,23 @@ title="Confluent Platform overview - v2", description="Overview of the main health-check metrics from Confluent Platform components.", tags=[ - 'confluent', 'kafka', 'zookeeper', 'kafka-connect', 'schema-registry', 'ksqldb' + "confluent", + "kafka", + "zookeeper", + "kafka-connect", + "schema-registry", + "ksqldb", + ], + inputs=[ + G.DataSourceInput( + name="DS_PROMETHEUS", + label="Prometheus", + pluginId="prometheus", + pluginName="Prometheus", + ) ], - inputs=[G.DataSourceInput(name="DS_PROMETHEUS",label="Prometheus",pluginId="prometheus",pluginName="Prometheus")], templating=templating, timezone="browser", panels=panels, + refresh='30s', ).auto_panel_ids() diff --git a/grafana-dashboards/kafka-cluster.json b/grafana-dashboards/kafka-cluster.json new file mode 100644 index 00000000..9a384b9a --- /dev/null +++ b/grafana-dashboards/kafka-cluster.json @@ -0,0 +1,1525 @@ +{ + "__inputs": [ + { + "description": "", + "label": "Prometheus", + "name": "DS_PROMETHEUS", + "pluginId": "prometheus", + "pluginName": "Prometheus", + "type": "datasource" + } + ], + "annotations": { + "list": [] + }, + "description": "Overview of the Kafka cluster", + "editable": true, + "gnetId": null, + "hideControls": false, + "id": null, + "links": [], + "panels": [ + { + "cacheTimeout": null, + "collapsed": false, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 1, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Health-check", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 2, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "count(kafka_server_replicamanager_leadercount{namespace=\"$ns\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Online Brokers", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 4, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 3, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "value_and_name" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_controller_kafkacontroller_activecontrollercount{namespace=\"$ns\"} > 0", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Active Controller", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 8, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 4, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_controller_kafkacontroller_preferredreplicaimbalancecount{namespace=\"$ns\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Sum of Replica Imbalance", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 12, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 5, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_controller_controllerstats_uncleanleaderelectionspersec{namespace=\"$ns\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Sum of Unclean leader elections", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 16, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 6, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(rate(kafka_network_requestmetrics_requestspersec{namespace=\"$ns\",pod=~\"$broker\"}[5m]))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Rate of Requests/Sec", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 0, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 7, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_server_replicamanager_partitioncount{namespace=\"$ns\",pod=~\"$broker\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Sum of Partitions", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "red", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 4, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 8, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_server_replicamanager_underreplicatedpartitions{namespace=\"$ns\",pod=~\"$broker\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Sum of Partitions Under-Replicated (URP)", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "red", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 8, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 9, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_cluster_partition_underminisr{namespace=\"$ns\",pod=~\"$broker\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Sum of Partitions Under-MinISR", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "red", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 12, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 10, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_controller_kafkacontroller_offlinepartitionscount{namespace=\"$ns\",pod=~\"$broker\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Sum of Partitions Offline", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "collapsed": false, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 11, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "System", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 12, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "irate(process_cpu_seconds_total{namespace=\"$ns\",type=\"kafka\"}[5m])", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: CPU usage", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 13, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum without(area)(jvm_memory_bytes_used{namespace=\"$ns\",type=\"kafka\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Memory usage", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 14, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum without(gc)(irate(jvm_gc_collection_seconds_sum{namespace=\"$ns\",type=\"kafka\"}[5m]))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: GC collection", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": "Sum of req/sec rates", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 15, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 16, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum without(pod,instance,statefulset_kubernetes_io_pod_name)(rate(kafka_network_requestmetrics_requestspersec{namespace=\"$ns\",pod=~\"$broker\"}[5m]))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{request}}(v{{version}})", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Requests rates", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 17, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum without(pod,instance,statefulset_kubernetes_io_pod_name)(rate(kafka_network_requestmetrics_errorspersec{namespace=\"$ns\",pod=~\"$broker\",error!=\"NONE\"}[5m]))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{error}}@{{request}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Error rates", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Request rates", + "transformations": [], + "transparent": false, + "type": "row" + } + ], + "refresh": "30s", + "rows": [], + "schemaVersion": 12, + "sharedCrosshair": false, + "style": "dark", + "tags": [ + "confluent", + "kafka" + ], + "templating": { + "list": [ + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "Prometheus", + "hide": 0, + "includeAll": false, + "label": "Namespace", + "multi": false, + "name": "ns", + "options": [], + "query": "label_values(namespace)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "Prometheus", + "hide": 0, + "includeAll": true, + "label": "Broker", + "multi": true, + "name": "broker", + "options": [], + "query": "label_values(kafka_server_replicamanager_leadercount{namespace=\"$ns\"}, pod)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "hidden": false, + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Kafka cluster - v2", + "uid": null, + "version": 0 +} diff --git a/grafana-dashboards/kafka-cluster.py b/grafana-dashboards/kafka-cluster.py new file mode 100644 index 00000000..bc0c4b2d --- /dev/null +++ b/grafana-dashboards/kafka-cluster.py @@ -0,0 +1,277 @@ +import grafanalib.core as G + +hcHeight = 5 +statWidth = 4 +tsWidth=8 + +templating = G.Templating( + list=[ + G.Template( + name="ns", + label="Namespace", + dataSource="Prometheus", + query="label_values(namespace)", + ), + G.Template( + name="broker", + label="Broker", + dataSource="Prometheus", + query='label_values(kafka_server_replicamanager_leadercount{namespace="$ns"}, pod)', + multi=True, + includeAll=True, + ), + ] +) + +healthcheck_panels = [ + G.RowPanel( + title="Health-check", + gridPos=G.GridPos(h=1, w=24, x=0, y=0), + ), + G.Stat( + title="Kafka: Online Brokers", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='count(kafka_server_replicamanager_leadercount{namespace="$ns"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=hcHeight, w=statWidth, x=statWidth * 0, y=0) + ), + G.Stat( + title="Kafka: Active Controller", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_controller_kafkacontroller_activecontrollercount{namespace="$ns"} > 0', + legendFormat="{{pod}}", + ), + ], + reduceCalc="last", + textMode="value_and_name", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=hcHeight, w=statWidth, x=statWidth * 1, y=0), + ), + G.Stat( + title="Kafka: Sum of Replica Imbalance", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='sum(kafka_controller_kafkacontroller_preferredreplicaimbalancecount{namespace="$ns"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=hcHeight, w=statWidth, x=statWidth * 2, y=0), + ), + G.Stat( + title="Kafka: Sum of Unclean leader elections", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='sum(kafka_controller_controllerstats_uncleanleaderelectionspersec{namespace="$ns"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=hcHeight, w=statWidth, x=statWidth * 3, y=1), + ), + G.Stat( + title="Kafka: Rate of Requests/Sec", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='sum(rate(kafka_network_requestmetrics_requestspersec{namespace="$ns",pod=~"$broker"}[5m]))', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=hcHeight, w=statWidth, x=statWidth * 4, y=1) + ), + + G.Stat( + title="Kafka: Sum of Partitions", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='sum(kafka_server_replicamanager_partitioncount{namespace="$ns",pod=~"$broker"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=hcHeight, w=statWidth, x=statWidth * 0, y=1), + ), + G.Stat( + title="Kafka: Sum of Partitions Under-Replicated (URP)", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='sum(kafka_server_replicamanager_underreplicatedpartitions{namespace="$ns",pod=~"$broker"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="green"), + G.Threshold(index=1, value=1.0, color="red"), + ], + gridPos=G.GridPos(h=hcHeight, w=statWidth, x=statWidth * 1, y=1), + ), + G.Stat( + title="Kafka: Sum of Partitions Under-MinISR", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='sum(kafka_cluster_partition_underminisr{namespace="$ns",pod=~"$broker"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="green"), + G.Threshold(index=1, value=1.0, color="red"), + ], + gridPos=G.GridPos(h=hcHeight, w=statWidth, x=statWidth * 2, y=1), + ), + G.Stat( + title="Kafka: Sum of Partitions Offline", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='sum(kafka_controller_kafkacontroller_offlinepartitionscount{namespace="$ns",pod=~"$broker"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="green"), + G.Threshold(index=1, value=1.0, color="red"), + ], + gridPos=G.GridPos(h=hcHeight, w=statWidth, x=statWidth * 3, y=1), + ), +] + +system_base=2 + +system_panels = [ + G.RowPanel( + title="System", + gridPos=G.GridPos(h=1, w=24, x=0, y=system_base), + ), + G.TimeSeries( + title="Kafka: CPU usage", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='irate(process_cpu_seconds_total{namespace="$ns",type="kafka"}[5m])', + legendFormat="{{pod}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="percentunit", + gridPos=G.GridPos(h=hcHeight*2, w=tsWidth, x=tsWidth*0, y=system_base), + ), + G.TimeSeries( + title="Kafka: Memory usage", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='sum without(area)(jvm_memory_bytes_used{namespace="$ns",type="kafka"})', + legendFormat="{{pod}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="bytes", + gridPos=G.GridPos(h=hcHeight*2, w=tsWidth, x=tsWidth*1, y=system_base), + ), + G.TimeSeries( + title="Kafka: GC collection", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='sum without(gc)(irate(jvm_gc_collection_seconds_sum{namespace="$ns",type="kafka"}[5m]))', + legendFormat="{{pod}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="percentunit", + gridPos=G.GridPos(h=hcHeight*2, w=tsWidth, x=tsWidth*2, y=system_base), + ), +] + +request_base=3 +request=[ + G.TimeSeries( + title="Requests rates", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='sum without(pod,instance,statefulset_kubernetes_io_pod_name)(rate(kafka_network_requestmetrics_requestspersec{namespace="$ns",pod=~"$broker"}[5m]))', + legendFormat="{{request}}(v{{version}})", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="reqps", + gridPos=G.GridPos(h=hcHeight*2, w=tsWidth, x=tsWidth*0, y=request_base), + stacking={'mode': 'normal', 'group': 'A'}, + ), + G.TimeSeries( + title="Error rates", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='sum without(pod,instance,statefulset_kubernetes_io_pod_name)(rate(kafka_network_requestmetrics_errorspersec{namespace="$ns",pod=~"$broker",error!="NONE"}[5m]))', + legendFormat="{{error}}@{{request}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="reqps", + gridPos=G.GridPos(h=hcHeight*2, w=tsWidth, x=tsWidth*1, y=request_base), + stacking={'mode': 'normal', 'group': 'A'}, + ), +] +request_panels = [ + G.RowPanel( + title="Request rates", + description="Sum of req/sec rates", + gridPos=G.GridPos(h=1, w=24, x=0, y=request_base), + collapsed=True, + panels=request + ), +] + +panels = healthcheck_panels + system_panels + request_panels + +dashboard = G.Dashboard( + title="Kafka cluster - v2", + description="Overview of the Kafka cluster", + tags=["confluent", "kafka"], + inputs=[ + G.DataSourceInput( + name="DS_PROMETHEUS", + label="Prometheus", + pluginId="prometheus", + pluginName="Prometheus", + ) + ], + templating=templating, + timezone="browser", + panels=panels, + refresh='30s', +).auto_panel_ids() diff --git a/grafana-dashboards/zookeeper-cluster.json b/grafana-dashboards/zookeeper-cluster.json new file mode 100644 index 00000000..10bd026c --- /dev/null +++ b/grafana-dashboards/zookeeper-cluster.json @@ -0,0 +1,1269 @@ +{ + "__inputs": [ + { + "description": "", + "label": "Prometheus", + "name": "DS_PROMETHEUS", + "pluginId": "prometheus", + "pluginName": "Prometheus", + "type": "datasource" + } + ], + "annotations": { + "list": [] + }, + "description": "Overview of the Zookeeper cluster", + "editable": true, + "gnetId": null, + "hideControls": false, + "id": null, + "links": [], + "panels": [ + { + "cacheTimeout": null, + "collapsed": false, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 1, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Health-check", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 2.0, + "yaxis": "left" + }, + { + "color": "green", + "index": 2, + "line": true, + "op": "gt", + "value": 3.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 2, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "count(zookeeper_status_quorumsize{namespace=\"$ns\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ZK: Quorum Size", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 4, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 3, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "avg(zookeeper_inmemorydatatree_nodecount{namespace=\"$ns\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ZK: Avg. number of ZNodes", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 8, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 4, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(zookeeper_numaliveconnections{namespace=\"$ns\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ZK: Sum of number of Alive Connections", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 12, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 5, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(zookeeper_inmemorydatatree_watchcount{namespace=\"$ns\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ZK: Sum of watchers", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 8, + "x": 16, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 6, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "last" + ], + "displayMode": "table", + "placement": "right" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "zookeeper_outstandingrequests{namespace=\"$ns\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}} ({{server_id}}:{{member_type}})", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ZK: Outstanding Requests", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "collapsed": false, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 7, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "System", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 8, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "irate(process_cpu_seconds_total{namespace=\"$ns\",type=\"zookeeper\"}[5m])", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ZK: CPU usage", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 9, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum without(area)(jvm_memory_bytes_used{namespace=\"$ns\",type=\"zookeeper\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ZK: Memory usage", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 10, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum without(gc)(irate(jvm_gc_collection_seconds_sum{namespace=\"$ns\",type=\"zookeeper\"}[5m]))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ZK: GC collection", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 11, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 12, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "zookeeper_minrequestlatency{namespace=\"$ns\"} * zookeeper_ticktime", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ZK: Request Latency (Minimum)", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 13, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "zookeeper_avgrequestlatency{namespace=\"$ns\"} * zookeeper_ticktime", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ZK: Request Latency (Average)", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 14, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "zookeeper_maxrequestlatency{namespace=\"$ns\"} * zookeeper_ticktime", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ZK: Request Latency (Maximum)", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Latency", + "transformations": [], + "transparent": false, + "type": "row" + } + ], + "refresh": "30s", + "rows": [], + "schemaVersion": 12, + "sharedCrosshair": false, + "style": "dark", + "tags": [ + "confluent", + "kafka", + "zookeeper" + ], + "templating": { + "list": [ + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "Prometheus", + "hide": 0, + "includeAll": false, + "label": "Namespace", + "multi": false, + "name": "ns", + "options": [], + "query": "label_values(namespace)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "hidden": false, + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Zookeeper cluster - v2", + "uid": null, + "version": 0 +} diff --git a/grafana-dashboards/zookeeper-cluster.py b/grafana-dashboards/zookeeper-cluster.py new file mode 100644 index 00000000..7941643e --- /dev/null +++ b/grafana-dashboards/zookeeper-cluster.py @@ -0,0 +1,220 @@ +import grafanalib.core as G + +hcHeight = 5 +statWidth = 4 +tsWidth=8 + +templating = G.Templating( + list=[ + G.Template( + name="ns", + label="Namespace", + dataSource="Prometheus", + query="label_values(namespace)", + ), + ] +) + +healthcheck_panels = [ + G.RowPanel( + title="Health-check", + gridPos=G.GridPos(h=1, w=24, x=0, y=0), + ), + G.Stat( + title="ZK: Quorum Size", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='count(zookeeper_status_quorumsize{namespace="$ns"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="red"), + G.Threshold(index=1, value=2.0, color="yellow"), + G.Threshold(index=2, value=3.0, color="green"), + ], + gridPos=G.GridPos(h=hcHeight, w=statWidth, x=statWidth * 0, y=0), + ), + G.Stat( + title="ZK: Avg. number of ZNodes", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='avg(zookeeper_inmemorydatatree_nodecount{namespace="$ns"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=hcHeight, w=statWidth, x=statWidth * 1, y=0), + ), + G.Stat( + title="ZK: Sum of number of Alive Connections", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='sum(zookeeper_numaliveconnections{namespace="$ns"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=hcHeight, w=statWidth, x=statWidth * 2, y=0), + ), + G.Stat( + title="ZK: Sum of watchers", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='sum(zookeeper_inmemorydatatree_watchcount{namespace="$ns"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=hcHeight, w=statWidth, x=statWidth * 3, y=0), + ), + G.TimeSeries( + title="ZK: Outstanding Requests", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='zookeeper_outstandingrequests{namespace="$ns"}', + legendFormat="{{pod}} ({{server_id}}:{{member_type}})", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "last"], + legendPlacement="right", + gridPos=G.GridPos(h=hcHeight, w=tsWidth, x=statWidth * 4, y=0), + ), +] + +system_panels = [ + G.RowPanel( + title="System", + gridPos=G.GridPos(h=1, w=24, x=0, y=1), + ), + G.TimeSeries( + title="ZK: CPU usage", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='irate(process_cpu_seconds_total{namespace="$ns",type="zookeeper"}[5m])', + legendFormat="{{pod}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="percentunit", + gridPos=G.GridPos(h=hcHeight*2, w=tsWidth, x=tsWidth*0, y=1), + ), + G.TimeSeries( + title="ZK: Memory usage", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='sum without(area)(jvm_memory_bytes_used{namespace="$ns",type="zookeeper"})', + legendFormat="{{pod}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="bytes", + gridPos=G.GridPos(h=hcHeight*2, w=tsWidth, x=tsWidth*1, y=1), + ), + G.TimeSeries( + title="ZK: GC collection", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='sum without(gc)(irate(jvm_gc_collection_seconds_sum{namespace="$ns",type="zookeeper"}[5m]))', + legendFormat="{{pod}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="percentunit", + gridPos=G.GridPos(h=hcHeight*2, w=tsWidth, x=tsWidth*2, y=1), + ), +] + +# TODO: validate if latency metrics make sense. +# Values are high-watermark of the metric and multiplied by tick-time to represent milliseconds. +latency=[ + G.TimeSeries( + title="ZK: Request Latency (Minimum)", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='zookeeper_minrequestlatency{namespace="$ns"} * zookeeper_ticktime', + legendFormat="{{pod}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos(h=hcHeight*2, w=tsWidth, x=tsWidth*0, y=2), + ), + G.TimeSeries( + title="ZK: Request Latency (Average)", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='zookeeper_avgrequestlatency{namespace="$ns"} * zookeeper_ticktime', + legendFormat="{{pod}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos(h=hcHeight*2, w=tsWidth, x=tsWidth*1, y=2), + ), + G.TimeSeries( + title="ZK: Request Latency (Maximum)", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='zookeeper_maxrequestlatency{namespace="$ns"} * zookeeper_ticktime', + legendFormat="{{pod}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos(h=hcHeight*2, w=tsWidth, x=tsWidth*2, y=2), + ), + +] +latency_panels = [ + G.RowPanel( + title="Latency", + gridPos=G.GridPos(h=1, w=24, x=0, y=2), + collapsed=True, + panels=latency + ), +] + +panels = healthcheck_panels + system_panels + latency_panels + +dashboard = G.Dashboard( + title="Zookeeper cluster - v2", + description="Overview of the Zookeeper cluster", + tags=["confluent", "kafka", "zookeeper"], + inputs=[ + G.DataSourceInput( + name="DS_PROMETHEUS", + label="Prometheus", + pluginId="prometheus", + pluginName="Prometheus", + ) + ], + templating=templating, + timezone="browser", + panels=panels, + refresh='30s', +).auto_panel_ids() From 401b204634436b45465525e1ae8ab9cdc31f0225 Mon Sep 17 00:00:00 2001 From: Jorge Esteban Quilcate Otoya Date: Wed, 29 Jun 2022 13:22:51 +0100 Subject: [PATCH 08/28] feat: throughput panels --- grafana-dashboards/kafka-cluster.json | 526 +++++++++++++++++++++++++- grafana-dashboards/kafka-cluster.py | 90 ++++- 2 files changed, 605 insertions(+), 11 deletions(-) diff --git a/grafana-dashboards/kafka-cluster.json b/grafana-dashboards/kafka-cluster.json index 9a384b9a..2799503d 100644 --- a/grafana-dashboards/kafka-cluster.json +++ b/grafana-dashboards/kafka-cluster.json @@ -417,7 +417,7 @@ } ] }, - "unit": "none" + "unit": "reqps" }, "overrides": [] }, @@ -425,7 +425,7 @@ "h": 5, "w": 4, "x": 16, - "y": 1 + "y": 0 }, "height": null, "hideTimeOverride": false, @@ -831,6 +831,172 @@ "transparent": false, "type": "stat" }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 16, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 11, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(rate(kafka_server_brokertopicmetrics_bytesinpersec{namespace=\"$ns\",pod=~\"$broker\"}[5m]))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Bytes In/Sec", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 20, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 12, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(rate(kafka_server_brokertopicmetrics_bytesoutpersec{namespace=\"$ns\",pod=~\"$broker\"}[5m]))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Bytes Out/Sec", + "transformations": [], + "transparent": false, + "type": "stat" + }, { "cacheTimeout": null, "collapsed": false, @@ -854,7 +1020,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 11, + "id": 13, "interval": null, "links": [], "maxDataPoints": 100, @@ -926,7 +1092,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 12, + "id": 14, "interval": null, "links": [], "maxDataPoints": 100, @@ -1026,7 +1192,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 13, + "id": 15, "interval": null, "links": [], "maxDataPoints": 100, @@ -1126,7 +1292,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 14, + "id": 16, "interval": null, "links": [], "maxDataPoints": 100, @@ -1195,7 +1361,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 15, + "id": 17, "interval": null, "links": [], "maxDataPoints": 100, @@ -1259,7 +1425,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 16, + "id": 18, "interval": null, "links": [], "maxDataPoints": 100, @@ -1362,7 +1528,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 17, + "id": 19, "interval": null, "links": [], "maxDataPoints": 100, @@ -1419,6 +1585,348 @@ "transformations": [], "transparent": false, "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": "Bytes in/out per second", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 20, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 21, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum without(topic) (rate(kafka_server_brokertopicmetrics_messagesinpersec{namespace=\"$ns\",pod=~\"$broker\"}[5m]))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Messages In/Sec", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 22, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum without(topic) (rate(kafka_server_brokertopicmetrics_bytesinpersec{namespace=\"$ns\",pod=~\"$broker\"}[5m]))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Bytes In/Sec", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 23, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum without(topic) (rate(kafka_server_brokertopicmetrics_bytesoutpersec{namespace=\"$ns\",pod=~\"$broker\"}[5m]))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Bytes Out/Sec", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Throughput", + "transformations": [], + "transparent": false, + "type": "row" } ], "refresh": "30s", diff --git a/grafana-dashboards/kafka-cluster.py b/grafana-dashboards/kafka-cluster.py index bc0c4b2d..d3b2cbb7 100644 --- a/grafana-dashboards/kafka-cluster.py +++ b/grafana-dashboards/kafka-cluster.py @@ -98,7 +98,8 @@ thresholds=[ G.Threshold(index=0, value=0.0, color="blue"), ], - gridPos=G.GridPos(h=hcHeight, w=statWidth, x=statWidth * 4, y=1) + format='reqps', + gridPos=G.GridPos(h=hcHeight, w=statWidth, x=statWidth * 4, y=0) ), G.Stat( @@ -160,6 +161,36 @@ ], gridPos=G.GridPos(h=hcHeight, w=statWidth, x=statWidth * 3, y=1), ), + G.Stat( + title="Kafka: Bytes In/Sec", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='sum(rate(kafka_server_brokertopicmetrics_bytesinpersec{namespace="$ns",pod=~"$broker"}[5m]))', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + format='binBps', + gridPos=G.GridPos(h=hcHeight, w=statWidth, x=statWidth * 4, y=1) + ), + G.Stat( + title="Kafka: Bytes Out/Sec", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='sum(rate(kafka_server_brokertopicmetrics_bytesoutpersec{namespace="$ns",pod=~"$broker"}[5m]))', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + format='binBps', + gridPos=G.GridPos(h=hcHeight, w=statWidth, x=statWidth * 5, y=1), + ), ] system_base=2 @@ -256,7 +287,62 @@ ), ] -panels = healthcheck_panels + system_panels + request_panels +throughtput_base = request_base + 1; +throughput = [ + G.TimeSeries( + title="Messages In/Sec", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='sum without(topic) (rate(kafka_server_brokertopicmetrics_messagesinpersec{namespace="$ns",pod=~"$broker"}[5m]))', + legendFormat="{{pod}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="cps", + gridPos=G.GridPos(h=hcHeight*2, w=tsWidth, x=tsWidth*0, y=throughtput_base), + ), + G.TimeSeries( + title="Bytes In/Sec", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='sum without(topic) (rate(kafka_server_brokertopicmetrics_bytesinpersec{namespace="$ns",pod=~"$broker"}[5m]))', + legendFormat="{{pod}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="binBps", + gridPos=G.GridPos(h=hcHeight*2, w=tsWidth, x=tsWidth*1, y=throughtput_base), + ), + G.TimeSeries( + title="Bytes Out/Sec", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='sum without(topic) (rate(kafka_server_brokertopicmetrics_bytesoutpersec{namespace="$ns",pod=~"$broker"}[5m]))', + legendFormat="{{pod}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="binBps", + gridPos=G.GridPos(h=hcHeight*2, w=tsWidth, x=tsWidth*2, y=throughtput_base), + ), +] +throughput_panels = [ + G.RowPanel( + title="Throughput", + description="Bytes in/out per second", + gridPos=G.GridPos(h=1, w=24, x=0, y=throughtput_base), + collapsed=True, + panels=throughput + ), +] + +panels = healthcheck_panels + system_panels + request_panels + throughput_panels dashboard = G.Dashboard( title="Kafka cluster - v2", From 9f95e0a9e9c16d5944c699886c91f1e343f61fc3 Mon Sep 17 00:00:00 2001 From: Jorge Esteban Quilcate Otoya Date: Wed, 29 Jun 2022 13:39:52 +0100 Subject: [PATCH 09/28] feat: thread utilization panels --- grafana-dashboards/confluent-platform.py | 2 +- grafana-dashboards/kafka-cluster.json | 359 +++++++++++++++++++++-- grafana-dashboards/kafka-cluster.py | 111 +++++-- grafana-dashboards/zookeeper-cluster.py | 21 +- 4 files changed, 438 insertions(+), 55 deletions(-) diff --git a/grafana-dashboards/confluent-platform.py b/grafana-dashboards/confluent-platform.py index 33f623f3..4b964402 100644 --- a/grafana-dashboards/confluent-platform.py +++ b/grafana-dashboards/confluent-platform.py @@ -457,5 +457,5 @@ templating=templating, timezone="browser", panels=panels, - refresh='30s', + refresh="30s", ).auto_panel_ids() diff --git a/grafana-dashboards/kafka-cluster.json b/grafana-dashboards/kafka-cluster.json index 2799503d..88c26035 100644 --- a/grafana-dashboards/kafka-cluster.json +++ b/grafana-dashboards/kafka-cluster.json @@ -475,6 +475,89 @@ "transparent": false, "type": "stat" }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 20, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 7, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "max(sum(kafka_log_log_size{namespace=\"$ns\",pod=~\"$broker\"}) by (pod))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Max Logs Size", + "transformations": [], + "transparent": false, + "type": "stat" + }, { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", @@ -512,7 +595,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 7, + "id": 8, "interval": null, "links": [], "maxDataPoints": 100, @@ -603,7 +686,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 8, + "id": 9, "interval": null, "links": [], "maxDataPoints": 100, @@ -694,7 +777,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 9, + "id": 10, "interval": null, "links": [], "maxDataPoints": 100, @@ -785,7 +868,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 10, + "id": 11, "interval": null, "links": [], "maxDataPoints": 100, @@ -868,7 +951,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 11, + "id": 12, "interval": null, "links": [], "maxDataPoints": 100, @@ -951,7 +1034,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 12, + "id": 13, "interval": null, "links": [], "maxDataPoints": 100, @@ -1020,7 +1103,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 13, + "id": 14, "interval": null, "links": [], "maxDataPoints": 100, @@ -1092,7 +1175,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 14, + "id": 15, "interval": null, "links": [], "maxDataPoints": 100, @@ -1192,7 +1275,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 15, + "id": 16, "interval": null, "links": [], "maxDataPoints": 100, @@ -1292,7 +1375,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 16, + "id": 17, "interval": null, "links": [], "maxDataPoints": 100, @@ -1361,7 +1444,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 17, + "id": 18, "interval": null, "links": [], "maxDataPoints": 100, @@ -1425,7 +1508,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 18, + "id": 19, "interval": null, "links": [], "maxDataPoints": 100, @@ -1528,7 +1611,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 19, + "id": 20, "interval": null, "links": [], "maxDataPoints": 100, @@ -1609,7 +1692,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 20, + "id": 21, "interval": null, "links": [], "maxDataPoints": 100, @@ -1670,7 +1753,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 21, + "id": 22, "interval": null, "links": [], "maxDataPoints": 100, @@ -1770,7 +1853,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 22, + "id": 23, "interval": null, "links": [], "maxDataPoints": 100, @@ -1870,7 +1953,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 23, + "id": 24, "interval": null, "links": [], "maxDataPoints": 100, @@ -1927,6 +2010,248 @@ "transformations": [], "transparent": false, "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": "Internal thread pools usage", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 5 + }, + "height": null, + "hideTimeOverride": false, + "id": 25, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 5 + }, + "height": null, + "hideTimeOverride": false, + "id": 26, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "1-kafka_network_socketserver_networkprocessoravgidlepercent{namespace=\"$ns\",pod=~\"$broker\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Network processor usage", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 5 + }, + "height": null, + "hideTimeOverride": false, + "id": 27, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "1-kafka_server_kafkarequesthandlerpool_requesthandleravgidlepercent_total{namespace=\"$ns\",pod=~\"$broker\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Request processor (IO) usage", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Thread Utilization", + "transformations": [], + "transparent": false, + "type": "row" } ], "refresh": "30s", diff --git a/grafana-dashboards/kafka-cluster.py b/grafana-dashboards/kafka-cluster.py index d3b2cbb7..b341a9bf 100644 --- a/grafana-dashboards/kafka-cluster.py +++ b/grafana-dashboards/kafka-cluster.py @@ -2,7 +2,7 @@ hcHeight = 5 statWidth = 4 -tsWidth=8 +tsWidth = 8 templating = G.Templating( list=[ @@ -40,7 +40,7 @@ thresholds=[ G.Threshold(index=0, value=0.0, color="blue"), ], - gridPos=G.GridPos(h=hcHeight, w=statWidth, x=statWidth * 0, y=0) + gridPos=G.GridPos(h=hcHeight, w=statWidth, x=statWidth * 0, y=0), ), G.Stat( title="Kafka: Active Controller", @@ -98,8 +98,25 @@ thresholds=[ G.Threshold(index=0, value=0.0, color="blue"), ], - format='reqps', - gridPos=G.GridPos(h=hcHeight, w=statWidth, x=statWidth * 4, y=0) + format="reqps", + gridPos=G.GridPos(h=hcHeight, w=statWidth, x=statWidth * 4, y=0), + ), + G.Stat( + title="Kafka: Max Logs Size", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='max(sum(kafka_log_log_size{namespace="$ns",pod=~"$broker"}) by (pod))', + legendFormat="{{pod}}", + ), + ], + reduceCalc="last", + textMode="value_and_name", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + format="bytes", + gridPos=G.GridPos(h=hcHeight, w=statWidth, x=statWidth * 5, y=0), ), G.Stat( @@ -173,8 +190,8 @@ thresholds=[ G.Threshold(index=0, value=0.0, color="blue"), ], - format='binBps', - gridPos=G.GridPos(h=hcHeight, w=statWidth, x=statWidth * 4, y=1) + format="binBps", + gridPos=G.GridPos(h=hcHeight, w=statWidth, x=statWidth * 4, y=1), ), G.Stat( title="Kafka: Bytes Out/Sec", @@ -188,12 +205,12 @@ thresholds=[ G.Threshold(index=0, value=0.0, color="blue"), ], - format='binBps', + format="binBps", gridPos=G.GridPos(h=hcHeight, w=statWidth, x=statWidth * 5, y=1), ), ] -system_base=2 +system_base = 2 system_panels = [ G.RowPanel( @@ -212,7 +229,7 @@ legendDisplayMode="table", legendCalcs=["max", "mean", "last"], unit="percentunit", - gridPos=G.GridPos(h=hcHeight*2, w=tsWidth, x=tsWidth*0, y=system_base), + gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 0, y=system_base), ), G.TimeSeries( title="Kafka: Memory usage", @@ -226,7 +243,7 @@ legendDisplayMode="table", legendCalcs=["max", "mean", "last"], unit="bytes", - gridPos=G.GridPos(h=hcHeight*2, w=tsWidth, x=tsWidth*1, y=system_base), + gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 1, y=system_base), ), G.TimeSeries( title="Kafka: GC collection", @@ -240,12 +257,12 @@ legendDisplayMode="table", legendCalcs=["max", "mean", "last"], unit="percentunit", - gridPos=G.GridPos(h=hcHeight*2, w=tsWidth, x=tsWidth*2, y=system_base), + gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 2, y=system_base), ), ] -request_base=3 -request=[ +request_base = 3 +request_inner = [ G.TimeSeries( title="Requests rates", dataSource="${DS_PROMETHEUS}", @@ -258,8 +275,8 @@ legendDisplayMode="table", legendCalcs=["max", "mean", "last"], unit="reqps", - gridPos=G.GridPos(h=hcHeight*2, w=tsWidth, x=tsWidth*0, y=request_base), - stacking={'mode': 'normal', 'group': 'A'}, + gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 0, y=request_base), + stacking={"mode": "normal", "group": "A"}, ), G.TimeSeries( title="Error rates", @@ -273,8 +290,8 @@ legendDisplayMode="table", legendCalcs=["max", "mean", "last"], unit="reqps", - gridPos=G.GridPos(h=hcHeight*2, w=tsWidth, x=tsWidth*1, y=request_base), - stacking={'mode': 'normal', 'group': 'A'}, + gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 1, y=request_base), + stacking={"mode": "normal", "group": "A"}, ), ] request_panels = [ @@ -283,12 +300,12 @@ description="Sum of req/sec rates", gridPos=G.GridPos(h=1, w=24, x=0, y=request_base), collapsed=True, - panels=request + panels=request_inner, ), ] -throughtput_base = request_base + 1; -throughput = [ +throughtput_base = request_base + 1 +throughput_inner = [ G.TimeSeries( title="Messages In/Sec", dataSource="${DS_PROMETHEUS}", @@ -301,7 +318,7 @@ legendDisplayMode="table", legendCalcs=["max", "mean", "last"], unit="cps", - gridPos=G.GridPos(h=hcHeight*2, w=tsWidth, x=tsWidth*0, y=throughtput_base), + gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 0, y=throughtput_base), ), G.TimeSeries( title="Bytes In/Sec", @@ -315,7 +332,7 @@ legendDisplayMode="table", legendCalcs=["max", "mean", "last"], unit="binBps", - gridPos=G.GridPos(h=hcHeight*2, w=tsWidth, x=tsWidth*1, y=throughtput_base), + gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 1, y=throughtput_base), ), G.TimeSeries( title="Bytes Out/Sec", @@ -329,7 +346,7 @@ legendDisplayMode="table", legendCalcs=["max", "mean", "last"], unit="binBps", - gridPos=G.GridPos(h=hcHeight*2, w=tsWidth, x=tsWidth*2, y=throughtput_base), + gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 2, y=throughtput_base), ), ] throughput_panels = [ @@ -338,11 +355,53 @@ description="Bytes in/out per second", gridPos=G.GridPos(h=1, w=24, x=0, y=throughtput_base), collapsed=True, - panels=throughput + panels=throughput_inner, + ), +] + +thread_base = throughtput_base + 1 +thread_inner = [ + G.TimeSeries( + title="Network processor usage", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='1-kafka_network_socketserver_networkprocessoravgidlepercent{namespace="$ns",pod=~"$broker"}', + legendFormat="{{pod}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="percentunit", + gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 0, y=thread_base), + ), + G.TimeSeries( + title="Request processor (IO) usage", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='1-kafka_server_kafkarequesthandlerpool_requesthandleravgidlepercent_total{namespace="$ns",pod=~"$broker"}', + legendFormat="{{pod}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="percentunit", + gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 1, y=thread_base), + ), +] + +thread_panels = [ + G.RowPanel( + title="Thread Utilization", + description="Internal thread pools usage", + gridPos=G.GridPos(h=1, w=24, x=0, y=thread_base), + collapsed=True, + panels=thread_inner, ), ] -panels = healthcheck_panels + system_panels + request_panels + throughput_panels +panels = healthcheck_panels + system_panels + request_panels + throughput_panels + thread_panels dashboard = G.Dashboard( title="Kafka cluster - v2", @@ -359,5 +418,5 @@ templating=templating, timezone="browser", panels=panels, - refresh='30s', + refresh="30s", ).auto_panel_ids() diff --git a/grafana-dashboards/zookeeper-cluster.py b/grafana-dashboards/zookeeper-cluster.py index 7941643e..440240b2 100644 --- a/grafana-dashboards/zookeeper-cluster.py +++ b/grafana-dashboards/zookeeper-cluster.py @@ -2,7 +2,7 @@ hcHeight = 5 statWidth = 4 -tsWidth=8 +tsWidth = 8 templating = G.Templating( list=[ @@ -111,7 +111,7 @@ legendDisplayMode="table", legendCalcs=["max", "mean", "last"], unit="percentunit", - gridPos=G.GridPos(h=hcHeight*2, w=tsWidth, x=tsWidth*0, y=1), + gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 0, y=1), ), G.TimeSeries( title="ZK: Memory usage", @@ -125,7 +125,7 @@ legendDisplayMode="table", legendCalcs=["max", "mean", "last"], unit="bytes", - gridPos=G.GridPos(h=hcHeight*2, w=tsWidth, x=tsWidth*1, y=1), + gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 1, y=1), ), G.TimeSeries( title="ZK: GC collection", @@ -139,13 +139,13 @@ legendDisplayMode="table", legendCalcs=["max", "mean", "last"], unit="percentunit", - gridPos=G.GridPos(h=hcHeight*2, w=tsWidth, x=tsWidth*2, y=1), + gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 2, y=1), ), ] # TODO: validate if latency metrics make sense. # Values are high-watermark of the metric and multiplied by tick-time to represent milliseconds. -latency=[ +latency = [ G.TimeSeries( title="ZK: Request Latency (Minimum)", dataSource="${DS_PROMETHEUS}", @@ -158,7 +158,7 @@ legendDisplayMode="table", legendCalcs=["max", "mean", "last"], unit="ms", - gridPos=G.GridPos(h=hcHeight*2, w=tsWidth, x=tsWidth*0, y=2), + gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 0, y=2), ), G.TimeSeries( title="ZK: Request Latency (Average)", @@ -172,7 +172,7 @@ legendDisplayMode="table", legendCalcs=["max", "mean", "last"], unit="ms", - gridPos=G.GridPos(h=hcHeight*2, w=tsWidth, x=tsWidth*1, y=2), + gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 1, y=2), ), G.TimeSeries( title="ZK: Request Latency (Maximum)", @@ -186,16 +186,15 @@ legendDisplayMode="table", legendCalcs=["max", "mean", "last"], unit="ms", - gridPos=G.GridPos(h=hcHeight*2, w=tsWidth, x=tsWidth*2, y=2), + gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 2, y=2), ), - ] latency_panels = [ G.RowPanel( title="Latency", gridPos=G.GridPos(h=1, w=24, x=0, y=2), collapsed=True, - panels=latency + panels=latency, ), ] @@ -216,5 +215,5 @@ templating=templating, timezone="browser", panels=panels, - refresh='30s', + refresh="30s", ).auto_panel_ids() From 9cfd076639773213de9c617e91a16ab21ad6af17 Mon Sep 17 00:00:00 2001 From: Jorge Esteban Quilcate Otoya Date: Wed, 29 Jun 2022 13:56:58 +0100 Subject: [PATCH 10/28] feat: connection rates --- grafana-dashboards/kafka-cluster.json | 820 +++++++++++++++++++++++--- grafana-dashboards/kafka-cluster.py | 182 ++++-- 2 files changed, 868 insertions(+), 134 deletions(-) diff --git a/grafana-dashboards/kafka-cluster.json b/grafana-dashboards/kafka-cluster.json index 88c26035..c0bdc280 100644 --- a/grafana-dashboards/kafka-cluster.json +++ b/grafana-dashboards/kafka-cluster.json @@ -530,7 +530,7 @@ "fields": "", "values": false }, - "textMode": "auto" + "textMode": "value_and_name" }, "repeat": null, "repeatDirection": null, @@ -1425,7 +1425,7 @@ "cacheTimeout": null, "collapsed": true, "datasource": null, - "description": "Sum of req/sec rates", + "description": "Bytes in/out per second", "editable": true, "error": false, "fieldConfig": { @@ -1483,10 +1483,7 @@ }, "showPoints": "auto", "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, + "stacking": {}, "thresholdsStyle": { "mode": "off" } @@ -1496,7 +1493,7 @@ "mode": "absolute", "steps": [] }, - "unit": "reqps" + "unit": "cps" }, "overrides": [] }, @@ -1534,13 +1531,13 @@ "targets": [ { "datasource": null, - "expr": "sum without(pod,instance,statefulset_kubernetes_io_pod_name)(rate(kafka_network_requestmetrics_requestspersec{namespace=\"$ns\",pod=~\"$broker\"}[5m]))", + "expr": "sum without(topic) (rate(kafka_server_brokertopicmetrics_messagesinpersec{namespace=\"$ns\",pod=~\"$broker\"}[5m]))", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 2, - "legendFormat": "{{request}}(v{{version}})", + "legendFormat": "{{pod}}", "metric": "", "refId": "", "step": 10, @@ -1549,7 +1546,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "Requests rates", + "title": "Messages In/Sec", "transformations": [], "transparent": false, "type": "timeseries" @@ -1586,10 +1583,7 @@ }, "showPoints": "auto", "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, + "stacking": {}, "thresholdsStyle": { "mode": "off" } @@ -1599,7 +1593,7 @@ "mode": "absolute", "steps": [] }, - "unit": "reqps" + "unit": "binBps" }, "overrides": [] }, @@ -1637,13 +1631,13 @@ "targets": [ { "datasource": null, - "expr": "sum without(pod,instance,statefulset_kubernetes_io_pod_name)(rate(kafka_network_requestmetrics_errorspersec{namespace=\"$ns\",pod=~\"$broker\",error!=\"NONE\"}[5m]))", + "expr": "sum without(topic) (rate(kafka_server_brokertopicmetrics_bytesinpersec{namespace=\"$ns\",pod=~\"$broker\"}[5m]))", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 2, - "legendFormat": "{{error}}@{{request}}", + "legendFormat": "{{pod}}", "metric": "", "refId": "", "step": 10, @@ -1652,53 +1646,11 @@ ], "timeFrom": null, "timeShift": null, - "title": "Error rates", + "title": "Bytes In/Sec", "transformations": [], "transparent": false, "type": "timeseries" - } - ], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Request rates", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": "Bytes in/out per second", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 4 - }, - "height": null, - "hideTimeOverride": false, - "id": 21, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ + }, { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", @@ -1741,19 +1693,19 @@ "mode": "absolute", "steps": [] }, - "unit": "cps" + "unit": "binBps" }, "overrides": [] }, "gridPos": { "h": 10, "w": 8, - "x": 0, - "y": 4 + "x": 16, + "y": 3 }, "height": null, "hideTimeOverride": false, - "id": 22, + "id": 21, "interval": null, "links": [], "maxDataPoints": 100, @@ -1779,7 +1731,7 @@ "targets": [ { "datasource": null, - "expr": "sum without(topic) (rate(kafka_server_brokertopicmetrics_messagesinpersec{namespace=\"$ns\",pod=~\"$broker\"}[5m]))", + "expr": "sum without(topic) (rate(kafka_server_brokertopicmetrics_bytesoutpersec{namespace=\"$ns\",pod=~\"$broker\"}[5m]))", "format": "time_series", "hide": false, "instant": false, @@ -1794,11 +1746,53 @@ ], "timeFrom": null, "timeShift": null, - "title": "Messages In/Sec", + "title": "Bytes Out/Sec", "transformations": [], "transparent": false, "type": "timeseries" - }, + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Throughput", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": "Internal thread pools usage", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 22, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", @@ -1841,14 +1835,14 @@ "mode": "absolute", "steps": [] }, - "unit": "binBps" + "unit": "percentunit" }, "overrides": [] }, "gridPos": { "h": 10, "w": 8, - "x": 8, + "x": 0, "y": 4 }, "height": null, @@ -1879,7 +1873,7 @@ "targets": [ { "datasource": null, - "expr": "sum without(topic) (rate(kafka_server_brokertopicmetrics_bytesinpersec{namespace=\"$ns\",pod=~\"$broker\"}[5m]))", + "expr": "1-kafka_network_socketserver_networkprocessoravgidlepercent{namespace=\"$ns\",pod=~\"$broker\"}", "format": "time_series", "hide": false, "instant": false, @@ -1894,7 +1888,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "Bytes In/Sec", + "title": "Network processor usage", "transformations": [], "transparent": false, "type": "timeseries" @@ -1941,14 +1935,14 @@ "mode": "absolute", "steps": [] }, - "unit": "binBps" + "unit": "percentunit" }, "overrides": [] }, "gridPos": { "h": 10, "w": 8, - "x": 16, + "x": 8, "y": 4 }, "height": null, @@ -1979,7 +1973,7 @@ "targets": [ { "datasource": null, - "expr": "sum without(topic) (rate(kafka_server_brokertopicmetrics_bytesoutpersec{namespace=\"$ns\",pod=~\"$broker\"}[5m]))", + "expr": "1-kafka_server_kafkarequesthandlerpool_requesthandleravgidlepercent_total{namespace=\"$ns\",pod=~\"$broker\"}", "format": "time_series", "hide": false, "instant": false, @@ -1994,7 +1988,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "Bytes Out/Sec", + "title": "Request processor (IO) usage", "transformations": [], "transparent": false, "type": "timeseries" @@ -2006,7 +2000,7 @@ "targets": [], "timeFrom": null, "timeShift": null, - "title": "Throughput", + "title": "Thread Utilization", "transformations": [], "transparent": false, "type": "row" @@ -2015,7 +2009,7 @@ "cacheTimeout": null, "collapsed": true, "datasource": null, - "description": "Internal thread pools usage", + "description": "Sum of req/sec rates", "editable": true, "error": false, "fieldConfig": { @@ -2073,7 +2067,10 @@ }, "showPoints": "auto", "spanNulls": false, - "stacking": {}, + "stacking": { + "group": "A", + "mode": "normal" + }, "thresholdsStyle": { "mode": "off" } @@ -2083,7 +2080,7 @@ "mode": "absolute", "steps": [] }, - "unit": "percentunit" + "unit": "reqps" }, "overrides": [] }, @@ -2121,13 +2118,13 @@ "targets": [ { "datasource": null, - "expr": "1-kafka_network_socketserver_networkprocessoravgidlepercent{namespace=\"$ns\",pod=~\"$broker\"}", + "expr": "sum without(pod,instance,statefulset_kubernetes_io_pod_name)(rate(kafka_network_requestmetrics_requestspersec{namespace=\"$ns\",pod=~\"$broker\"}[5m]))", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 2, - "legendFormat": "{{pod}}", + "legendFormat": "{{request}}(v{{version}})", "metric": "", "refId": "", "step": 10, @@ -2136,7 +2133,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "Network processor usage", + "title": "Requests rates", "transformations": [], "transparent": false, "type": "timeseries" @@ -2173,7 +2170,10 @@ }, "showPoints": "auto", "spanNulls": false, - "stacking": {}, + "stacking": { + "group": "A", + "mode": "normal" + }, "thresholdsStyle": { "mode": "off" } @@ -2183,7 +2183,7 @@ "mode": "absolute", "steps": [] }, - "unit": "percentunit" + "unit": "reqps" }, "overrides": [] }, @@ -2221,13 +2221,13 @@ "targets": [ { "datasource": null, - "expr": "1-kafka_server_kafkarequesthandlerpool_requesthandleravgidlepercent_total{namespace=\"$ns\",pod=~\"$broker\"}", + "expr": "sum without(pod,instance,statefulset_kubernetes_io_pod_name)(rate(kafka_network_requestmetrics_errorspersec{namespace=\"$ns\",pod=~\"$broker\",error!=\"NONE\"}[5m]))", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 2, - "legendFormat": "{{pod}}", + "legendFormat": "{{error}}@{{request}}", "metric": "", "refId": "", "step": 10, @@ -2236,7 +2236,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "Request processor (IO) usage", + "title": "Error rates", "transformations": [], "transparent": false, "type": "timeseries" @@ -2248,7 +2248,649 @@ "targets": [], "timeFrom": null, "timeShift": null, - "title": "Thread Utilization", + "title": "Request rates", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 6 + }, + "height": null, + "hideTimeOverride": false, + "id": 28, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 6 + }, + "height": null, + "hideTimeOverride": false, + "id": 29, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_server_socketservermetrics_connection_count{namespace=\"$ns\",pod=~\"$broker\"}) by (pod)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Sum of Connections alive per Broker", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 6 + }, + "height": null, + "hideTimeOverride": false, + "id": 30, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_server_socketservermetrics_connection_creation_rate{namespace=\"$ns\",pod=~\"$broker\"}) by (pod)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Sum of Connections creation rate per Broker", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 6 + }, + "height": null, + "hideTimeOverride": false, + "id": 31, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_server_socketservermetrics_connection_close_rate{namespace=\"$ns\",pod=~\"$broker\"}) by (pod)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Sum of Connections close rate per Broker", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 7 + }, + "height": null, + "hideTimeOverride": false, + "id": 32, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_server_socketservermetrics_connection_count{namespace=\"$ns\",pod=~\"$broker\"}) by (listener)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{listener}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Sum of Connections alive per Listener", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 7 + }, + "height": null, + "hideTimeOverride": false, + "id": 33, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_server_socketservermetrics_connection_creation_rate{namespace=\"$ns\",pod=~\"$broker\"}) by (listener)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{listener}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Sum of Connections creation rate per Listener", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 7 + }, + "height": null, + "hideTimeOverride": false, + "id": 34, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_server_socketservermetrics_connection_close_rate{namespace=\"$ns\",pod=~\"$broker\"}) by (listener)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{listener}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Sum of Connections close rate per Listener", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Connections", "transformations": [], "transparent": false, "type": "row" diff --git a/grafana-dashboards/kafka-cluster.py b/grafana-dashboards/kafka-cluster.py index b341a9bf..3bb636e4 100644 --- a/grafana-dashboards/kafka-cluster.py +++ b/grafana-dashboards/kafka-cluster.py @@ -106,7 +106,7 @@ dataSource="${DS_PROMETHEUS}", targets=[ G.Target( - expr='max(sum(kafka_log_log_size{namespace="$ns",pod=~"$broker"}) by (pod))', + expr='sum(kafka_log_log_size{namespace="$ns",pod=~"$broker"}) by (pod)', legendFormat="{{pod}}", ), ], @@ -261,7 +261,103 @@ ), ] -request_base = 3 +throughtput_base = system_base + 1 +throughput_inner = [ + G.TimeSeries( + title="Messages In/Sec", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='sum without(topic) (rate(kafka_server_brokertopicmetrics_messagesinpersec{namespace="$ns",pod=~"$broker"}[5m]))', + legendFormat="{{pod}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="cps", + gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 0, y=throughtput_base), + ), + G.TimeSeries( + title="Bytes In/Sec", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='sum without(topic) (rate(kafka_server_brokertopicmetrics_bytesinpersec{namespace="$ns",pod=~"$broker"}[5m]))', + legendFormat="{{pod}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="binBps", + gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 1, y=throughtput_base), + ), + G.TimeSeries( + title="Bytes Out/Sec", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='sum without(topic) (rate(kafka_server_brokertopicmetrics_bytesoutpersec{namespace="$ns",pod=~"$broker"}[5m]))', + legendFormat="{{pod}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="binBps", + gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 2, y=throughtput_base), + ), +] +throughput_panels = [ + G.RowPanel( + title="Throughput", + description="Bytes in/out per second", + gridPos=G.GridPos(h=1, w=24, x=0, y=throughtput_base), + collapsed=True, + panels=throughput_inner, + ), +] + +thread_base = throughtput_base + 1 +thread_inner = [ + G.TimeSeries( + title="Network processor usage", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='1-kafka_network_socketserver_networkprocessoravgidlepercent{namespace="$ns",pod=~"$broker"}', + legendFormat="{{pod}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="percentunit", + gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 0, y=thread_base), + ), + G.TimeSeries( + title="Request processor (IO) usage", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='1-kafka_server_kafkarequesthandlerpool_requesthandleravgidlepercent_total{namespace="$ns",pod=~"$broker"}', + legendFormat="{{pod}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="percentunit", + gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 1, y=thread_base), + ), +] +thread_panels = [ + G.RowPanel( + title="Thread Utilization", + description="Internal thread pools usage", + gridPos=G.GridPos(h=1, w=24, x=0, y=thread_base), + collapsed=True, + panels=thread_inner, + ), +] + +request_base = thread_base + 1 request_inner = [ G.TimeSeries( title="Requests rates", @@ -304,104 +400,100 @@ ), ] -throughtput_base = request_base + 1 -throughput_inner = [ + +connection_base = request_base + 1 +connection_inner = [ G.TimeSeries( - title="Messages In/Sec", + title="Sum of Connections alive per Broker", dataSource="${DS_PROMETHEUS}", targets=[ G.Target( - expr='sum without(topic) (rate(kafka_server_brokertopicmetrics_messagesinpersec{namespace="$ns",pod=~"$broker"}[5m]))', + expr='sum(kafka_server_socketservermetrics_connection_count{namespace="$ns",pod=~"$broker"}) by (pod)', legendFormat="{{pod}}", ), ], legendDisplayMode="table", legendCalcs=["max", "mean", "last"], - unit="cps", - gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 0, y=throughtput_base), + gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 0, y=connection_base), ), G.TimeSeries( - title="Bytes In/Sec", + title="Sum of Connections creation rate per Broker", dataSource="${DS_PROMETHEUS}", targets=[ G.Target( - expr='sum without(topic) (rate(kafka_server_brokertopicmetrics_bytesinpersec{namespace="$ns",pod=~"$broker"}[5m]))', + expr='sum(kafka_server_socketservermetrics_connection_creation_rate{namespace="$ns",pod=~"$broker"}) by (pod)', legendFormat="{{pod}}", ), ], legendDisplayMode="table", legendCalcs=["max", "mean", "last"], - unit="binBps", - gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 1, y=throughtput_base), + gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 1, y=connection_base), ), G.TimeSeries( - title="Bytes Out/Sec", + title="Sum of Connections close rate per Broker", dataSource="${DS_PROMETHEUS}", targets=[ G.Target( - expr='sum without(topic) (rate(kafka_server_brokertopicmetrics_bytesoutpersec{namespace="$ns",pod=~"$broker"}[5m]))', + expr='sum(kafka_server_socketservermetrics_connection_close_rate{namespace="$ns",pod=~"$broker"}) by (pod)', legendFormat="{{pod}}", ), ], legendDisplayMode="table", legendCalcs=["max", "mean", "last"], - unit="binBps", - gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 2, y=throughtput_base), + gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 2, y=connection_base), ), -] -throughput_panels = [ - G.RowPanel( - title="Throughput", - description="Bytes in/out per second", - gridPos=G.GridPos(h=1, w=24, x=0, y=throughtput_base), - collapsed=True, - panels=throughput_inner, + # By Listener + G.TimeSeries( + title="Sum of Connections alive per Listener", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='sum(kafka_server_socketservermetrics_connection_count{namespace="$ns",pod=~"$broker"}) by (listener)', + legendFormat="{{listener}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 0, y=connection_base + 1), ), -] - -thread_base = throughtput_base + 1 -thread_inner = [ G.TimeSeries( - title="Network processor usage", + title="Sum of Connections creation rate per Listener", dataSource="${DS_PROMETHEUS}", targets=[ G.Target( - expr='1-kafka_network_socketserver_networkprocessoravgidlepercent{namespace="$ns",pod=~"$broker"}', - legendFormat="{{pod}}", + expr='sum(kafka_server_socketservermetrics_connection_creation_rate{namespace="$ns",pod=~"$broker"}) by (listener)', + legendFormat="{{listener}}", ), ], legendDisplayMode="table", legendCalcs=["max", "mean", "last"], - unit="percentunit", - gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 0, y=thread_base), + gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 1, y=connection_base + 1), ), G.TimeSeries( - title="Request processor (IO) usage", + title="Sum of Connections close rate per Listener", dataSource="${DS_PROMETHEUS}", targets=[ G.Target( - expr='1-kafka_server_kafkarequesthandlerpool_requesthandleravgidlepercent_total{namespace="$ns",pod=~"$broker"}', - legendFormat="{{pod}}", + expr='sum(kafka_server_socketservermetrics_connection_close_rate{namespace="$ns",pod=~"$broker"}) by (listener)', + legendFormat="{{listener}}", ), ], legendDisplayMode="table", legendCalcs=["max", "mean", "last"], - unit="percentunit", - gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 1, y=thread_base), + gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 2, y=connection_base + 1), ), -] -thread_panels = [ +] +connection_panels = [ G.RowPanel( - title="Thread Utilization", - description="Internal thread pools usage", - gridPos=G.GridPos(h=1, w=24, x=0, y=thread_base), + title="Connections", + gridPos=G.GridPos(h=1, w=24, x=0, y=connection_base), collapsed=True, - panels=thread_inner, + panels=connection_inner, ), ] -panels = healthcheck_panels + system_panels + request_panels + throughput_panels + thread_panels +panels = healthcheck_panels + system_panels + throughput_panels + thread_panels + request_panels + connection_panels dashboard = G.Dashboard( title="Kafka cluster - v2", From b8b258ec8bae7bf54a3f076bf4daf6777cc8ee3f Mon Sep 17 00:00:00 2001 From: Jorge Esteban Quilcate Otoya Date: Wed, 29 Jun 2022 14:04:48 +0100 Subject: [PATCH 11/28] feat: isr panels --- grafana-dashboards/kafka-cluster.json | 244 +++++++++++++++++++++++++- grafana-dashboards/kafka-cluster.py | 41 ++++- 2 files changed, 282 insertions(+), 3 deletions(-) diff --git a/grafana-dashboards/kafka-cluster.json b/grafana-dashboards/kafka-cluster.json index c0bdc280..bfe32122 100644 --- a/grafana-dashboards/kafka-cluster.json +++ b/grafana-dashboards/kafka-cluster.json @@ -538,7 +538,7 @@ "targets": [ { "datasource": null, - "expr": "max(sum(kafka_log_log_size{namespace=\"$ns\",pod=~\"$broker\"}) by (pod))", + "expr": "sum(kafka_log_log_size{namespace=\"$ns\",pod=~\"$broker\"}) by (pod)", "format": "time_series", "hide": false, "instant": false, @@ -2894,6 +2894,248 @@ "transformations": [], "transparent": false, "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 35, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 36, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "rate(kafka_server_replicamanager_isrshrinkspersec{namespace=\"$ns\",pod=~\"$broker\"}[5m])", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of ISR Shrinks/sec", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 37, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "rate(kafka_server_replicamanager_isrexpandspersec{namespace=\"$ns\",pod=~\"$broker\"}[5m])", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of ISR Expands/sec", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "In-Sync Replicas", + "transformations": [], + "transparent": false, + "type": "row" } ], "refresh": "30s", diff --git a/grafana-dashboards/kafka-cluster.py b/grafana-dashboards/kafka-cluster.py index 3bb636e4..641c265a 100644 --- a/grafana-dashboards/kafka-cluster.py +++ b/grafana-dashboards/kafka-cluster.py @@ -482,7 +482,6 @@ legendCalcs=["max", "mean", "last"], gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 2, y=connection_base + 1), ), - ] connection_panels = [ G.RowPanel( @@ -493,7 +492,45 @@ ), ] -panels = healthcheck_panels + system_panels + throughput_panels + thread_panels + request_panels + connection_panels +isr_base = connection_base + 2 +isr_inner = [ + G.TimeSeries( + title="Rate of ISR Shrinks/sec", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='rate(kafka_server_replicamanager_isrshrinkspersec{namespace="$ns",pod=~"$broker"}[5m])', + legendFormat="{{pod}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 0, y=isr_base), + ), + G.TimeSeries( + title="Rate of ISR Expands/sec", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='rate(kafka_server_replicamanager_isrexpandspersec{namespace="$ns",pod=~"$broker"}[5m])', + legendFormat="{{pod}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 1, y=isr_base), + ), +] +isr_panels = [ + G.RowPanel( + title="In-Sync Replicas", + gridPos=G.GridPos(h=1, w=24, x=0, y=isr_base), + collapsed=True, + panels=isr_inner, + ), +] + +panels = healthcheck_panels + system_panels + throughput_panels + thread_panels + request_panels + connection_panels + isr_panels dashboard = G.Dashboard( title="Kafka cluster - v2", From 551aaff99a3291672da170d8ca444e34db8afe3b Mon Sep 17 00:00:00 2001 From: Jorge Esteban Quilcate Otoya Date: Wed, 29 Jun 2022 15:19:56 +0100 Subject: [PATCH 12/28] feat: latency panels --- grafana-dashboards/kafka-cluster.json | 1655 ++++++++++++++++++++++++- grafana-dashboards/kafka-cluster.py | 256 +++- 2 files changed, 1908 insertions(+), 3 deletions(-) diff --git a/grafana-dashboards/kafka-cluster.json b/grafana-dashboards/kafka-cluster.json index bfe32122..fa8cec89 100644 --- a/grafana-dashboards/kafka-cluster.json +++ b/grafana-dashboards/kafka-cluster.json @@ -553,7 +553,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "Kafka: Max Logs Size", + "title": "Kafka: Logs Size", "transformations": [], "transparent": false, "type": "stat" @@ -3136,6 +3136,1632 @@ "transformations": [], "transparent": false, "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 9 + }, + "height": null, + "hideTimeOverride": false, + "id": 38, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 9 + }, + "height": null, + "hideTimeOverride": false, + "id": 39, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_network_requestmetrics_requestqueuetimems{namespace=\"$ns\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"Produce\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Produce: Request Queue Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 9 + }, + "height": null, + "hideTimeOverride": false, + "id": 40, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_network_requestmetrics_localtimems{namespace=\"$ns\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"Produce\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Produce: Local Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 9 + }, + "height": null, + "hideTimeOverride": false, + "id": 41, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_network_requestmetrics_remotetimems{namespace=\"$ns\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"Produce\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Produce: Remote Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 10 + }, + "height": null, + "hideTimeOverride": false, + "id": 42, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_network_requestmetrics_responsequeuetimems{namespace=\"$ns\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"Produce\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Produce: Response Queue Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 10 + }, + "height": null, + "hideTimeOverride": false, + "id": 43, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_network_requestmetrics_responsesendtimems{namespace=\"$ns\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"Produce\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Produce: Response Send Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Producer Request latency", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 11 + }, + "height": null, + "hideTimeOverride": false, + "id": 44, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 11 + }, + "height": null, + "hideTimeOverride": false, + "id": 45, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_network_requestmetrics_requestqueuetimems{namespace=\"$ns\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"Fetch\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Fetch: Request Queue Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 11 + }, + "height": null, + "hideTimeOverride": false, + "id": 46, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_network_requestmetrics_localtimems{namespace=\"$ns\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"Fetch\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Fetch: Local Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 11 + }, + "height": null, + "hideTimeOverride": false, + "id": 47, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_network_requestmetrics_remotetimems{namespace=\"$ns\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"Fetch\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Fetch: Remote Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 12 + }, + "height": null, + "hideTimeOverride": false, + "id": 48, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_network_requestmetrics_responsequeuetimems{namespace=\"$ns\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"Fetch\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Fetch: Response Queue Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 12 + }, + "height": null, + "hideTimeOverride": false, + "id": 49, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_network_requestmetrics_responsesendtimems{namespace=\"$ns\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"Fetch\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Fetch: Response Send Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Consumer Fetch Request latency", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 13 + }, + "height": null, + "hideTimeOverride": false, + "id": 50, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 13 + }, + "height": null, + "hideTimeOverride": false, + "id": 51, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_network_requestmetrics_requestqueuetimems{namespace=\"$ns\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"FetchFollower\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Fetch: Request Queue Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 13 + }, + "height": null, + "hideTimeOverride": false, + "id": 52, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_network_requestmetrics_localtimems{namespace=\"$ns\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"FetchFollower\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Fetch: Local Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 13 + }, + "height": null, + "hideTimeOverride": false, + "id": 53, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_network_requestmetrics_remotetimems{namespace=\"$ns\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"FetchFollower\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Fetch: Remote Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 14 + }, + "height": null, + "hideTimeOverride": false, + "id": 54, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_network_requestmetrics_responsequeuetimems{namespace=\"$ns\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"FetchFollower\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Fetch: Response Queue Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 14 + }, + "height": null, + "hideTimeOverride": false, + "id": 55, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_network_requestmetrics_responsesendtimems{namespace=\"$ns\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"FetchFollower\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Fetch: Response Send Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Replica Fetch Request latency", + "transformations": [], + "transparent": false, + "type": "row" } ], "refresh": "30s", @@ -3202,6 +4828,33 @@ "tagsQuery": null, "type": "query", "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "Prometheus", + "hide": 0, + "includeAll": false, + "label": "Quantile", + "multi": false, + "name": "quantile", + "options": [], + "query": "label_values(kafka_network_requestmetrics_requestqueuetimems{namespace=\"$ns\"}, quantile)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false } ] }, diff --git a/grafana-dashboards/kafka-cluster.py b/grafana-dashboards/kafka-cluster.py index 641c265a..7032c3fe 100644 --- a/grafana-dashboards/kafka-cluster.py +++ b/grafana-dashboards/kafka-cluster.py @@ -20,6 +20,12 @@ multi=True, includeAll=True, ), + G.Template( + name="quantile", + label="Quantile", + dataSource="Prometheus", + query='label_values(kafka_network_requestmetrics_requestqueuetimems{namespace="$ns"}, quantile)', + ), ] ) @@ -102,7 +108,7 @@ gridPos=G.GridPos(h=hcHeight, w=statWidth, x=statWidth * 4, y=0), ), G.Stat( - title="Kafka: Max Logs Size", + title="Kafka: Logs Size", dataSource="${DS_PROMETHEUS}", targets=[ G.Target( @@ -530,7 +536,253 @@ ), ] -panels = healthcheck_panels + system_panels + throughput_panels + thread_panels + request_panels + connection_panels + isr_panels +producer_base = isr_base + 1 +producer_inner = [ + G.TimeSeries( + title="Produce: Request Queue Time", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_network_requestmetrics_requestqueuetimems{namespace="$ns",pod=~"$broker",quantile=~"$quantile",request="Produce"}', + legendFormat="{{pod}} ({{quantile}}th)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit='ms', + gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 0, y=producer_base), + ), + G.TimeSeries( + title="Produce: Local Time", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_network_requestmetrics_localtimems{namespace="$ns",pod=~"$broker",quantile=~"$quantile",request="Produce"}', + legendFormat="{{pod}} ({{quantile}}th)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit='ms', + gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 1, y=producer_base), + ), + G.TimeSeries( + title="Produce: Remote Time", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_network_requestmetrics_remotetimems{namespace="$ns",pod=~"$broker",quantile=~"$quantile",request="Produce"}', + legendFormat="{{pod}} ({{quantile}}th)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit='ms', + gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 2, y=producer_base), + ), + G.TimeSeries( + title="Produce: Response Queue Time", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_network_requestmetrics_responsequeuetimems{namespace="$ns",pod=~"$broker",quantile=~"$quantile",request="Produce"}', + legendFormat="{{pod}} ({{quantile}}th)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit='ms', + gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 0, y=producer_base + 1), + ), + G.TimeSeries( + title="Produce: Response Send Time", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_network_requestmetrics_responsesendtimems{namespace="$ns",pod=~"$broker",quantile=~"$quantile",request="Produce"}', + legendFormat="{{pod}} ({{quantile}}th)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit='ms', + gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 1, y=producer_base + 1), + ), +] +producer_panels = [ + G.RowPanel( + title="Producer Request latency", + gridPos=G.GridPos(h=1, w=24, x=0, y=producer_base), + collapsed=True, + panels=producer_inner, + ), +] + +consumer_base = producer_base + 2 +consumer_inner = [ + G.TimeSeries( + title="Fetch: Request Queue Time", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_network_requestmetrics_requestqueuetimems{namespace="$ns",pod=~"$broker",quantile=~"$quantile",request="Fetch"}', + legendFormat="{{pod}} ({{quantile}}th)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit='ms', + gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 0, y=consumer_base), + ), + G.TimeSeries( + title="Fetch: Local Time", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_network_requestmetrics_localtimems{namespace="$ns",pod=~"$broker",quantile=~"$quantile",request="Fetch"}', + legendFormat="{{pod}} ({{quantile}}th)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit='ms', + gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 1, y=consumer_base), + ), + G.TimeSeries( + title="Fetch: Remote Time", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_network_requestmetrics_remotetimems{namespace="$ns",pod=~"$broker",quantile=~"$quantile",request="Fetch"}', + legendFormat="{{pod}} ({{quantile}}th)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit='ms', + gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 2, y=consumer_base), + ), + G.TimeSeries( + title="Fetch: Response Queue Time", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_network_requestmetrics_responsequeuetimems{namespace="$ns",pod=~"$broker",quantile=~"$quantile",request="Fetch"}', + legendFormat="{{pod}} ({{quantile}}th)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit='ms', + gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 0, y=consumer_base + 1), + ), + G.TimeSeries( + title="Fetch: Response Send Time", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_network_requestmetrics_responsesendtimems{namespace="$ns",pod=~"$broker",quantile=~"$quantile",request="Fetch"}', + legendFormat="{{pod}} ({{quantile}}th)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit='ms', + gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 1, y=consumer_base + 1), + ), +] +consumer_panels = [ + G.RowPanel( + title="Consumer Fetch Request latency", + gridPos=G.GridPos(h=1, w=24, x=0, y=consumer_base), + collapsed=True, + panels=consumer_inner, + ), +] + +replication_base = consumer_base + 2 +replication_inner = [ + G.TimeSeries( + title="Fetch: Request Queue Time", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_network_requestmetrics_requestqueuetimems{namespace="$ns",pod=~"$broker",quantile=~"$quantile",request="FetchFollower"}', + legendFormat="{{pod}} ({{quantile}}th)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit='ms', + gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 0, y=replication_base), + ), + G.TimeSeries( + title="Fetch: Local Time", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_network_requestmetrics_localtimems{namespace="$ns",pod=~"$broker",quantile=~"$quantile",request="FetchFollower"}', + legendFormat="{{pod}} ({{quantile}}th)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit='ms', + gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 1, y=replication_base), + ), + G.TimeSeries( + title="Fetch: Remote Time", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_network_requestmetrics_remotetimems{namespace="$ns",pod=~"$broker",quantile=~"$quantile",request="FetchFollower"}', + legendFormat="{{pod}} ({{quantile}}th)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit='ms', + gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 2, y=replication_base), + ), + G.TimeSeries( + title="Fetch: Response Queue Time", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_network_requestmetrics_responsequeuetimems{namespace="$ns",pod=~"$broker",quantile=~"$quantile",request="FetchFollower"}', + legendFormat="{{pod}} ({{quantile}}th)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit='ms', + gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 0, y=replication_base + 1), + ), + G.TimeSeries( + title="Fetch: Response Send Time", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_network_requestmetrics_responsesendtimems{namespace="$ns",pod=~"$broker",quantile=~"$quantile",request="FetchFollower"}', + legendFormat="{{pod}} ({{quantile}}th)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit='ms', + gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 1, y=replication_base + 1), + ), +] +replication_panels = [ + G.RowPanel( + title="Replica Fetch Request latency", + gridPos=G.GridPos(h=1, w=24, x=0, y=replication_base), + collapsed=True, + panels=replication_inner, + ), +] + +panels = healthcheck_panels + system_panels + throughput_panels + thread_panels + request_panels + connection_panels + isr_panels + producer_panels + consumer_panels + replication_panels dashboard = G.Dashboard( title="Kafka cluster - v2", From 350d4d7e49b83c0062f3da20253c2f3c9b2dbf18 Mon Sep 17 00:00:00 2001 From: Jorge Esteban Quilcate Otoya Date: Wed, 29 Jun 2022 16:35:33 +0100 Subject: [PATCH 13/28] feat: missing panels for zk and kafka --- grafana-dashboards/kafka-cluster.json | 648 +++++++++++++++++++++- grafana-dashboards/kafka-cluster.py | 116 +++- grafana-dashboards/zookeeper-cluster.json | 579 ++++++++++++++++++- grafana-dashboards/zookeeper-cluster.py | 100 +++- 4 files changed, 1431 insertions(+), 12 deletions(-) diff --git a/grafana-dashboards/kafka-cluster.json b/grafana-dashboards/kafka-cluster.json index fa8cec89..8d603f44 100644 --- a/grafana-dashboards/kafka-cluster.json +++ b/grafana-dashboards/kafka-cluster.json @@ -3674,7 +3674,7 @@ "targets": [], "timeFrom": null, "timeShift": null, - "title": "Producer Request latency", + "title": "Request latency: Producer", "transformations": [], "transparent": false, "type": "row" @@ -4216,7 +4216,7 @@ "targets": [], "timeFrom": null, "timeShift": null, - "title": "Consumer Fetch Request latency", + "title": "Request latency: Consumer Fetch", "transformations": [], "transparent": false, "type": "row" @@ -4758,7 +4758,649 @@ "targets": [], "timeFrom": null, "timeShift": null, - "title": "Replica Fetch Request latency", + "title": "Request latency: Replica Fetch", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 15 + }, + "height": null, + "hideTimeOverride": false, + "id": 56, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 15 + }, + "height": null, + "hideTimeOverride": false, + "id": 57, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_coordinator_group_groupmetadatamanager_numgroups{namespace=\"$ns\",pod=~\"$broker\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Number of Groups per Broker", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 15 + }, + "height": null, + "hideTimeOverride": false, + "id": 58, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupsstable{namespace=\"$ns\",pod=~\"$broker\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "stable", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupspreparingrebalance{namespace=\"$ns\",pod=~\"$broker\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "preparing_rebalance", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupsdead{namespace=\"$ns\",pod=~\"$broker\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "dead", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupscompletingrebalance{namespace=\"$ns\",pod=~\"$broker\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "completing_rebalance", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupsempty{namespace=\"$ns\",pod=~\"$broker\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "empty", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Number of Groups per Broker", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Group Coordinator", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 16 + }, + "height": null, + "hideTimeOverride": false, + "id": 59, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "opsps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 16 + }, + "height": null, + "hideTimeOverride": false, + "id": 60, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_server_brokertopicmetrics_producemessageconversionspersec{namespace=\"$ns\",pod=~\"$broker\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Sum of Produce conversion rate per sec", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "opsps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 16 + }, + "height": null, + "hideTimeOverride": false, + "id": 61, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_server_brokertopicmetrics_fetchmessageconversionspersec{namespace=\"$ns\",pod=~\"$broker\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Sum of Fetch conversion rate per sec", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 16 + }, + "height": null, + "hideTimeOverride": false, + "id": 62, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_server_socketservermetrics_connections{namespace=\"$ns\",pod=~\"$broker\"}) by (client_software_name,client_software_version)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_software_name}} (v{{client_software_version}})", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Sum of Connections per version", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Message Conversion", "transformations": [], "transparent": false, "type": "row" diff --git a/grafana-dashboards/kafka-cluster.py b/grafana-dashboards/kafka-cluster.py index 7032c3fe..0902e3f5 100644 --- a/grafana-dashboards/kafka-cluster.py +++ b/grafana-dashboards/kafka-cluster.py @@ -611,7 +611,7 @@ ] producer_panels = [ G.RowPanel( - title="Producer Request latency", + title="Request latency: Producer", gridPos=G.GridPos(h=1, w=24, x=0, y=producer_base), collapsed=True, panels=producer_inner, @@ -693,7 +693,7 @@ ] consumer_panels = [ G.RowPanel( - title="Consumer Fetch Request latency", + title="Request latency: Consumer Fetch", gridPos=G.GridPos(h=1, w=24, x=0, y=consumer_base), collapsed=True, panels=consumer_inner, @@ -775,14 +775,122 @@ ] replication_panels = [ G.RowPanel( - title="Replica Fetch Request latency", + title="Request latency: Replica Fetch", gridPos=G.GridPos(h=1, w=24, x=0, y=replication_base), collapsed=True, panels=replication_inner, ), ] -panels = healthcheck_panels + system_panels + throughput_panels + thread_panels + request_panels + connection_panels + isr_panels + producer_panels + consumer_panels + replication_panels +group_base = replication_base + 2 +group_inner = [ + G.TimeSeries( + title="Number of Groups per Broker", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_coordinator_group_groupmetadatamanager_numgroups{namespace="$ns",pod=~"$broker"}', + legendFormat="{{pod}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 0, y=group_base), + ), + G.TimeSeries( + title="Number of Groups per Broker", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='sum(kafka_coordinator_group_groupmetadatamanager_numgroupsstable{namespace="$ns",pod=~"$broker"})', + legendFormat="stable", + ), + G.Target( + expr='sum(kafka_coordinator_group_groupmetadatamanager_numgroupspreparingrebalance{namespace="$ns",pod=~"$broker"})', + legendFormat="preparing_rebalance", + ), + G.Target( + expr='sum(kafka_coordinator_group_groupmetadatamanager_numgroupsdead{namespace="$ns",pod=~"$broker"})', + legendFormat="dead", + ), + G.Target( + expr='sum(kafka_coordinator_group_groupmetadatamanager_numgroupscompletingrebalance{namespace="$ns",pod=~"$broker"})', + legendFormat="completing_rebalance", + ), + G.Target( + expr='sum(kafka_coordinator_group_groupmetadatamanager_numgroupsempty{namespace="$ns",pod=~"$broker"})', + legendFormat="empty", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + stacking={"mode": "normal"}, + gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 1, y=group_base), + ), +] +group_panels = [ + G.RowPanel( + title="Group Coordinator", + gridPos=G.GridPos(h=1, w=24, x=0, y=group_base), + collapsed=True, + panels=group_inner, + ), +] + +conversion_base = group_base + 1 +conversion_inner = [ + G.TimeSeries( + title="Sum of Produce conversion rate per sec", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='sum(kafka_server_brokertopicmetrics_producemessageconversionspersec{namespace="$ns",pod=~"$broker"})', + legendFormat="{{pod}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit='opsps', + gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 0, y=conversion_base), + ), + G.TimeSeries( + title="Sum of Fetch conversion rate per sec", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='sum(kafka_server_brokertopicmetrics_fetchmessageconversionspersec{namespace="$ns",pod=~"$broker"})', + legendFormat="{{pod}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit='opsps', + gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 1, y=conversion_base), + ), + G.TimeSeries( + title="Sum of Connections per version", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='sum(kafka_server_socketservermetrics_connections{namespace="$ns",pod=~"$broker"}) by (client_software_name,client_software_version)', + legendFormat="{{client_software_name}} (v{{client_software_version}})", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 2, y=conversion_base), + ), +] +conversion_panels = [ + G.RowPanel( + title="Message Conversion", + gridPos=G.GridPos(h=1, w=24, x=0, y=conversion_base), + collapsed=True, + panels=conversion_inner, + ), +] + +panels = healthcheck_panels + system_panels + throughput_panels + thread_panels + request_panels + connection_panels + isr_panels + producer_panels + consumer_panels + replication_panels + group_panels + conversion_panels dashboard = G.Dashboard( title="Kafka cluster - v2", diff --git a/grafana-dashboards/zookeeper-cluster.json b/grafana-dashboards/zookeeper-cluster.json index 10bd026c..08dc89d5 100644 --- a/grafana-dashboards/zookeeper-cluster.json +++ b/grafana-dashboards/zookeeper-cluster.json @@ -1185,7 +1185,557 @@ "targets": [], "timeFrom": null, "timeShift": null, - "title": "Latency", + "title": "Server Latency", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 15, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 16, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_server_zookeeperclientmetrics_zookeeperrequestlatencyms{namespace=\"$ns\",quantile=~\"$quantile\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Request Latency", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 17, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_server_sessionexpirelistener_zookeepersyncconnectspersec{namespace=\"$ns\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Sync Connections/sec", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 18, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_server_sessionexpirelistener_zookeeperexpirespersec{namespace=\"$ns\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Expired Connections/sec", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 19, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_server_sessionexpirelistener_zookeeperdisconnectspersec{namespace=\"$ns\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Disconnected Connections/sec", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 20, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_server_sessionexpirelistener_zookeeperauthfailurespersec{namespace=\"$ns\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Auth Failures on Connections/sec", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Client Latency (Kafka)", "transformations": [], "transparent": false, "type": "row" @@ -1229,6 +1779,33 @@ "tagsQuery": null, "type": "query", "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "Prometheus", + "hide": 0, + "includeAll": false, + "label": "Quantile", + "multi": false, + "name": "quantile", + "options": [], + "query": "label_values(kafka_server_zookeeperclientmetrics_zookeeperrequestlatencyms{namespace=\"$ns\"}, quantile)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false } ] }, diff --git a/grafana-dashboards/zookeeper-cluster.py b/grafana-dashboards/zookeeper-cluster.py index 440240b2..26695eb5 100644 --- a/grafana-dashboards/zookeeper-cluster.py +++ b/grafana-dashboards/zookeeper-cluster.py @@ -12,6 +12,12 @@ dataSource="Prometheus", query="label_values(namespace)", ), + G.Template( + name="quantile", + label="Quantile", + dataSource="Prometheus", + query='label_values(kafka_server_zookeeperclientmetrics_zookeeperrequestlatencyms{namespace="$ns"}, quantile)', + ), ] ) @@ -145,7 +151,7 @@ # TODO: validate if latency metrics make sense. # Values are high-watermark of the metric and multiplied by tick-time to represent milliseconds. -latency = [ +latency_inner = [ G.TimeSeries( title="ZK: Request Latency (Minimum)", dataSource="${DS_PROMETHEUS}", @@ -191,14 +197,100 @@ ] latency_panels = [ G.RowPanel( - title="Latency", + title="Server Latency", gridPos=G.GridPos(h=1, w=24, x=0, y=2), collapsed=True, - panels=latency, + panels=latency_inner, + ), +] + +kafka_base = 2 + 1; +kafka_inner = [ + G.TimeSeries( + title="Kafka: Request Latency", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_server_zookeeperclientmetrics_zookeeperrequestlatencyms{namespace="$ns",quantile=~"$quantile"}', + legendFormat="{{pod}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 0, y=kafka_base), + ), + G.TimeSeries( + title="Kafka: Sync Connections/sec", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_server_sessionexpirelistener_zookeepersyncconnectspersec{namespace="$ns"}', + legendFormat="{{pod}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + stacking={"mode": "normal"}, + gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 1, y=kafka_base), + ), + G.TimeSeries( + title="Kafka: Expired Connections/sec", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_server_sessionexpirelistener_zookeeperexpirespersec{namespace="$ns"}', + legendFormat="{{pod}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + stacking={"mode": "normal"}, + gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 2, y=kafka_base), + ), + G.TimeSeries( + title="Kafka: Disconnected Connections/sec", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_server_sessionexpirelistener_zookeeperdisconnectspersec{namespace="$ns"}', + legendFormat="{{pod}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + stacking={"mode": "normal"}, + gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 0, y=kafka_base+1), + ), + G.TimeSeries( + title="Kafka: Auth Failures on Connections/sec", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_server_sessionexpirelistener_zookeeperauthfailurespersec{namespace="$ns"}', + legendFormat="{{pod}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + stacking={"mode": "normal"}, + gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 1, y=kafka_base+1), + ), +] +kafka_panels = [ + G.RowPanel( + title="Client Latency (Kafka)", + gridPos=G.GridPos(h=1, w=24, x=0, y=kafka_base), + collapsed=True, + panels=kafka_inner, ), ] -panels = healthcheck_panels + system_panels + latency_panels +panels = healthcheck_panels + system_panels + latency_panels + kafka_panels dashboard = G.Dashboard( title="Zookeeper cluster - v2", From 2b9b500998aa174b33b85912b80c939b0891a0b1 Mon Sep 17 00:00:00 2001 From: Jorge Esteban Quilcate Otoya Date: Thu, 30 Jun 2022 13:35:36 +0100 Subject: [PATCH 14/28] feat: kafka topics --- grafana-dashboards/Makefile | 1 + grafana-dashboards/kafka-cluster.json | 4 +- grafana-dashboards/kafka-cluster.py | 146 +-- grafana-dashboards/kafka-topics.json | 1085 +++++++++++++++++++++++ grafana-dashboards/kafka-topics.py | 249 ++++++ grafana-dashboards/zookeeper-cluster.py | 10 +- 6 files changed, 1430 insertions(+), 65 deletions(-) create mode 100644 grafana-dashboards/kafka-topics.json create mode 100644 grafana-dashboards/kafka-topics.py diff --git a/grafana-dashboards/Makefile b/grafana-dashboards/Makefile index 4bf86dd0..b937e9f4 100644 --- a/grafana-dashboards/Makefile +++ b/grafana-dashboards/Makefile @@ -2,3 +2,4 @@ all: generate-dashboard -o confluent-platform.json confluent-platform.py generate-dashboard -o zookeeper-cluster.json zookeeper-cluster.py generate-dashboard -o kafka-cluster.json kafka-cluster.py + generate-dashboard -o kafka-topics.json kafka-topics.py diff --git a/grafana-dashboards/kafka-cluster.json b/grafana-dashboards/kafka-cluster.json index 8d603f44..25e12780 100644 --- a/grafana-dashboards/kafka-cluster.json +++ b/grafana-dashboards/kafka-cluster.json @@ -372,7 +372,7 @@ "targets": [ { "datasource": null, - "expr": "sum(kafka_controller_controllerstats_uncleanleaderelectionspersec{namespace=\"$ns\"})", + "expr": "sum(kafka_controller_kafkacontroller_globaltopiccount{namespace=\"$ns\"})", "format": "time_series", "hide": false, "instant": false, @@ -387,7 +387,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "Kafka: Sum of Unclean leader elections", + "title": "Kafka: Sum of Topics", "transformations": [], "transparent": false, "type": "stat" diff --git a/grafana-dashboards/kafka-cluster.py b/grafana-dashboards/kafka-cluster.py index 0902e3f5..3c5e0f31 100644 --- a/grafana-dashboards/kafka-cluster.py +++ b/grafana-dashboards/kafka-cluster.py @@ -20,12 +20,12 @@ multi=True, includeAll=True, ), - G.Template( + G.Template( name="quantile", label="Quantile", dataSource="Prometheus", query='label_values(kafka_network_requestmetrics_requestqueuetimems{namespace="$ns"}, quantile)', - ), + ), ] ) @@ -79,11 +79,11 @@ gridPos=G.GridPos(h=hcHeight, w=statWidth, x=statWidth * 2, y=0), ), G.Stat( - title="Kafka: Sum of Unclean leader elections", + title="Kafka: Sum of Topics", dataSource="${DS_PROMETHEUS}", targets=[ G.Target( - expr='sum(kafka_controller_controllerstats_uncleanleaderelectionspersec{namespace="$ns"})', + expr='sum(kafka_controller_kafkacontroller_globaltopiccount{namespace="$ns"})', ), ], reduceCalc="last", @@ -124,7 +124,6 @@ format="bytes", gridPos=G.GridPos(h=hcHeight, w=statWidth, x=statWidth * 5, y=0), ), - G.Stat( title="Kafka: Sum of Partitions", dataSource="${DS_PROMETHEUS}", @@ -449,7 +448,7 @@ gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 2, y=connection_base), ), # By Listener - G.TimeSeries( + G.TimeSeries( title="Sum of Connections alive per Listener", dataSource="${DS_PROMETHEUS}", targets=[ @@ -460,7 +459,9 @@ ], legendDisplayMode="table", legendCalcs=["max", "mean", "last"], - gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 0, y=connection_base + 1), + gridPos=G.GridPos( + h=hcHeight * 2, w=tsWidth, x=tsWidth * 0, y=connection_base + 1 + ), ), G.TimeSeries( title="Sum of Connections creation rate per Listener", @@ -473,7 +474,9 @@ ], legendDisplayMode="table", legendCalcs=["max", "mean", "last"], - gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 1, y=connection_base + 1), + gridPos=G.GridPos( + h=hcHeight * 2, w=tsWidth, x=tsWidth * 1, y=connection_base + 1 + ), ), G.TimeSeries( title="Sum of Connections close rate per Listener", @@ -486,7 +489,9 @@ ], legendDisplayMode="table", legendCalcs=["max", "mean", "last"], - gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 2, y=connection_base + 1), + gridPos=G.GridPos( + h=hcHeight * 2, w=tsWidth, x=tsWidth * 2, y=connection_base + 1 + ), ), ] connection_panels = [ @@ -539,7 +544,7 @@ producer_base = isr_base + 1 producer_inner = [ G.TimeSeries( - title="Produce: Request Queue Time", + title="Produce: Request Queue Time", dataSource="${DS_PROMETHEUS}", targets=[ G.Target( @@ -549,11 +554,11 @@ ], legendDisplayMode="table", legendCalcs=["max", "mean", "last"], - unit='ms', + unit="ms", gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 0, y=producer_base), ), G.TimeSeries( - title="Produce: Local Time", + title="Produce: Local Time", dataSource="${DS_PROMETHEUS}", targets=[ G.Target( @@ -563,11 +568,11 @@ ], legendDisplayMode="table", legendCalcs=["max", "mean", "last"], - unit='ms', + unit="ms", gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 1, y=producer_base), ), G.TimeSeries( - title="Produce: Remote Time", + title="Produce: Remote Time", dataSource="${DS_PROMETHEUS}", targets=[ G.Target( @@ -577,11 +582,11 @@ ], legendDisplayMode="table", legendCalcs=["max", "mean", "last"], - unit='ms', + unit="ms", gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 2, y=producer_base), ), G.TimeSeries( - title="Produce: Response Queue Time", + title="Produce: Response Queue Time", dataSource="${DS_PROMETHEUS}", targets=[ G.Target( @@ -591,11 +596,13 @@ ], legendDisplayMode="table", legendCalcs=["max", "mean", "last"], - unit='ms', - gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 0, y=producer_base + 1), + unit="ms", + gridPos=G.GridPos( + h=hcHeight * 2, w=tsWidth, x=tsWidth * 0, y=producer_base + 1 + ), ), G.TimeSeries( - title="Produce: Response Send Time", + title="Produce: Response Send Time", dataSource="${DS_PROMETHEUS}", targets=[ G.Target( @@ -605,13 +612,15 @@ ], legendDisplayMode="table", legendCalcs=["max", "mean", "last"], - unit='ms', - gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 1, y=producer_base + 1), + unit="ms", + gridPos=G.GridPos( + h=hcHeight * 2, w=tsWidth, x=tsWidth * 1, y=producer_base + 1 + ), ), ] producer_panels = [ G.RowPanel( - title="Request latency: Producer", + title="Request latency: Producer", gridPos=G.GridPos(h=1, w=24, x=0, y=producer_base), collapsed=True, panels=producer_inner, @@ -621,7 +630,7 @@ consumer_base = producer_base + 2 consumer_inner = [ G.TimeSeries( - title="Fetch: Request Queue Time", + title="Fetch: Request Queue Time", dataSource="${DS_PROMETHEUS}", targets=[ G.Target( @@ -631,11 +640,11 @@ ], legendDisplayMode="table", legendCalcs=["max", "mean", "last"], - unit='ms', + unit="ms", gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 0, y=consumer_base), ), G.TimeSeries( - title="Fetch: Local Time", + title="Fetch: Local Time", dataSource="${DS_PROMETHEUS}", targets=[ G.Target( @@ -645,11 +654,11 @@ ], legendDisplayMode="table", legendCalcs=["max", "mean", "last"], - unit='ms', + unit="ms", gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 1, y=consumer_base), ), G.TimeSeries( - title="Fetch: Remote Time", + title="Fetch: Remote Time", dataSource="${DS_PROMETHEUS}", targets=[ G.Target( @@ -659,11 +668,11 @@ ], legendDisplayMode="table", legendCalcs=["max", "mean", "last"], - unit='ms', + unit="ms", gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 2, y=consumer_base), ), G.TimeSeries( - title="Fetch: Response Queue Time", + title="Fetch: Response Queue Time", dataSource="${DS_PROMETHEUS}", targets=[ G.Target( @@ -673,11 +682,13 @@ ], legendDisplayMode="table", legendCalcs=["max", "mean", "last"], - unit='ms', - gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 0, y=consumer_base + 1), + unit="ms", + gridPos=G.GridPos( + h=hcHeight * 2, w=tsWidth, x=tsWidth * 0, y=consumer_base + 1 + ), ), G.TimeSeries( - title="Fetch: Response Send Time", + title="Fetch: Response Send Time", dataSource="${DS_PROMETHEUS}", targets=[ G.Target( @@ -687,13 +698,15 @@ ], legendDisplayMode="table", legendCalcs=["max", "mean", "last"], - unit='ms', - gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 1, y=consumer_base + 1), + unit="ms", + gridPos=G.GridPos( + h=hcHeight * 2, w=tsWidth, x=tsWidth * 1, y=consumer_base + 1 + ), ), ] consumer_panels = [ G.RowPanel( - title="Request latency: Consumer Fetch", + title="Request latency: Consumer Fetch", gridPos=G.GridPos(h=1, w=24, x=0, y=consumer_base), collapsed=True, panels=consumer_inner, @@ -703,7 +716,7 @@ replication_base = consumer_base + 2 replication_inner = [ G.TimeSeries( - title="Fetch: Request Queue Time", + title="Fetch: Request Queue Time", dataSource="${DS_PROMETHEUS}", targets=[ G.Target( @@ -713,11 +726,11 @@ ], legendDisplayMode="table", legendCalcs=["max", "mean", "last"], - unit='ms', + unit="ms", gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 0, y=replication_base), ), G.TimeSeries( - title="Fetch: Local Time", + title="Fetch: Local Time", dataSource="${DS_PROMETHEUS}", targets=[ G.Target( @@ -727,11 +740,11 @@ ], legendDisplayMode="table", legendCalcs=["max", "mean", "last"], - unit='ms', + unit="ms", gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 1, y=replication_base), ), G.TimeSeries( - title="Fetch: Remote Time", + title="Fetch: Remote Time", dataSource="${DS_PROMETHEUS}", targets=[ G.Target( @@ -741,11 +754,11 @@ ], legendDisplayMode="table", legendCalcs=["max", "mean", "last"], - unit='ms', + unit="ms", gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 2, y=replication_base), ), G.TimeSeries( - title="Fetch: Response Queue Time", + title="Fetch: Response Queue Time", dataSource="${DS_PROMETHEUS}", targets=[ G.Target( @@ -755,11 +768,13 @@ ], legendDisplayMode="table", legendCalcs=["max", "mean", "last"], - unit='ms', - gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 0, y=replication_base + 1), + unit="ms", + gridPos=G.GridPos( + h=hcHeight * 2, w=tsWidth, x=tsWidth * 0, y=replication_base + 1 + ), ), G.TimeSeries( - title="Fetch: Response Send Time", + title="Fetch: Response Send Time", dataSource="${DS_PROMETHEUS}", targets=[ G.Target( @@ -769,13 +784,15 @@ ], legendDisplayMode="table", legendCalcs=["max", "mean", "last"], - unit='ms', - gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 1, y=replication_base + 1), + unit="ms", + gridPos=G.GridPos( + h=hcHeight * 2, w=tsWidth, x=tsWidth * 1, y=replication_base + 1 + ), ), ] replication_panels = [ G.RowPanel( - title="Request latency: Replica Fetch", + title="Request latency: Replica Fetch", gridPos=G.GridPos(h=1, w=24, x=0, y=replication_base), collapsed=True, panels=replication_inner, @@ -785,7 +802,7 @@ group_base = replication_base + 2 group_inner = [ G.TimeSeries( - title="Number of Groups per Broker", + title="Number of Groups per Broker", dataSource="${DS_PROMETHEUS}", targets=[ G.Target( @@ -798,7 +815,7 @@ gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 0, y=group_base), ), G.TimeSeries( - title="Number of Groups per Broker", + title="Number of Groups per Broker", dataSource="${DS_PROMETHEUS}", targets=[ G.Target( @@ -830,7 +847,7 @@ ] group_panels = [ G.RowPanel( - title="Group Coordinator", + title="Group Coordinator", gridPos=G.GridPos(h=1, w=24, x=0, y=group_base), collapsed=True, panels=group_inner, @@ -840,7 +857,7 @@ conversion_base = group_base + 1 conversion_inner = [ G.TimeSeries( - title="Sum of Produce conversion rate per sec", + title="Sum of Produce conversion rate per sec", dataSource="${DS_PROMETHEUS}", targets=[ G.Target( @@ -850,11 +867,11 @@ ], legendDisplayMode="table", legendCalcs=["max", "mean", "last"], - unit='opsps', + unit="opsps", gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 0, y=conversion_base), ), G.TimeSeries( - title="Sum of Fetch conversion rate per sec", + title="Sum of Fetch conversion rate per sec", dataSource="${DS_PROMETHEUS}", targets=[ G.Target( @@ -864,11 +881,11 @@ ], legendDisplayMode="table", legendCalcs=["max", "mean", "last"], - unit='opsps', + unit="opsps", gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 1, y=conversion_base), ), G.TimeSeries( - title="Sum of Connections per version", + title="Sum of Connections per version", dataSource="${DS_PROMETHEUS}", targets=[ G.Target( @@ -883,14 +900,27 @@ ] conversion_panels = [ G.RowPanel( - title="Message Conversion", + title="Message Conversion", gridPos=G.GridPos(h=1, w=24, x=0, y=conversion_base), collapsed=True, panels=conversion_inner, ), ] -panels = healthcheck_panels + system_panels + throughput_panels + thread_panels + request_panels + connection_panels + isr_panels + producer_panels + consumer_panels + replication_panels + group_panels + conversion_panels +panels = ( + healthcheck_panels + + system_panels + + throughput_panels + + thread_panels + + request_panels + + connection_panels + + isr_panels + + producer_panels + + consumer_panels + + replication_panels + + group_panels + + conversion_panels +) dashboard = G.Dashboard( title="Kafka cluster - v2", diff --git a/grafana-dashboards/kafka-topics.json b/grafana-dashboards/kafka-topics.json new file mode 100644 index 00000000..d241d624 --- /dev/null +++ b/grafana-dashboards/kafka-topics.json @@ -0,0 +1,1085 @@ +{ + "__inputs": [ + { + "description": "", + "label": "Prometheus", + "name": "DS_PROMETHEUS", + "pluginId": "prometheus", + "pluginName": "Prometheus", + "type": "datasource" + } + ], + "annotations": { + "list": [] + }, + "description": "Overview of the Kafka topics", + "editable": true, + "gnetId": null, + "hideControls": false, + "id": null, + "links": [], + "panels": [ + { + "cacheTimeout": null, + "collapsed": false, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 1, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Throughput", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 2, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10, sum without(instance,pod,statefulset_kubernetes_io_pod_name) (rate(kafka_server_brokertopicmetrics_messagesinpersec{topic=~\"$topic\",namespace=\"$ns\"}[5m])))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{topic}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Messages In/Sec", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 3, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10, sum(kafka_log_log_size{namespace=\"$ns\",topic=~\"$topic\"}) by (topic))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{topic}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Log size", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 4, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10, sum without(instance,pod,statefulset_kubernetes_io_pod_name) (rate(kafka_server_brokertopicmetrics_bytesinpersec{topic=~\"$topic\",namespace=\"$ns\"}[5m])))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{topic}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Bytes In/Sec", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 5, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10, sum without(instance,pod,statefulset_kubernetes_io_pod_name) (rate(kafka_server_brokertopicmetrics_bytesoutpersec{topic=~\"$topic\",namespace=\"$ns\"}[5m])))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{topic}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Bytes Out/Sec", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 6, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10, sum(rate(kafka_server_brokertopicmetrics_totalproducerequestspersec{ namespace=\"$ns\", topic=~\"$topic\"}[5m])) by (topic))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{topic}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Produce Requests/Sec", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 7, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10, sum(rate(kafka_server_brokertopicmetrics_totalfetchrequestspersec{ namespace=\"$ns\", topic=~\"$topic\"}[5m])) by (topic))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{topic}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Consumer Fetch Requests/Sec", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 8, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "color": { + "mode": "thresholds" + }, + "columns": [], + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": { + "align": "auto", + "displayMode": "auto", + "filterable": true + }, + "thresholds": { + "mode": "absolute", + "steps": [] + } + }, + "overrides": [] + }, + "fontSize": "100%", + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 9, + "interval": null, + "links": [], + "mappings": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "showHeader": true + }, + "repeat": null, + "repeatDirection": null, + "span": 6, + "targets": [ + { + "datasource": null, + "expr": "kafka_log_log_logstartoffset{namespace=\"$ns\",topic=~\"$topic\"}", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{topic}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Start Offsets", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "__name__": true, + "app": true, + "clusterId": true, + "confluentPlatform": true, + "confluent_platform": true, + "controller_revision_hash": true, + "instance": true, + "job": true, + "namespace": true, + "platform_confluent_io_type": true, + "statefulset_kubernetes_io_pod_name": true, + "type": true + }, + "indexByName": { + "Value": 4, + "partition": 3, + "pod": 1, + "topic": 2 + }, + "renameByName": { + "Value": "offset" + } + } + }, + { + "id": "convertFieldType", + "options": { + "conversions": [ + { + "destinationType": "number", + "targetField": "partition" + } + ], + "fields": {} + } + }, + { + "id": "sortBy", + "options": { + "fields": {}, + "sort": [ + { + "field": "topic" + } + ] + } + }, + { + "id": "sortBy", + "options": { + "fields": {}, + "sort": [ + { + "field": "partition" + } + ] + } + } + ], + "transparent": false, + "type": "table" + }, + { + "cacheTimeout": null, + "color": { + "mode": "thresholds" + }, + "columns": [], + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": { + "align": "auto", + "displayMode": "auto", + "filterable": true + }, + "thresholds": { + "mode": "absolute", + "steps": [] + } + }, + "overrides": [] + }, + "fontSize": "100%", + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 10, + "interval": null, + "links": [], + "mappings": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "showHeader": true + }, + "repeat": null, + "repeatDirection": null, + "span": 6, + "targets": [ + { + "datasource": null, + "expr": "kafka_log_log_logendoffset{namespace=\"$ns\",topic=~\"$topic\"}", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{topic}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "End Offsets", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "__name__": true, + "app": true, + "clusterId": true, + "confluentPlatform": true, + "confluent_platform": true, + "controller_revision_hash": true, + "instance": true, + "job": true, + "namespace": true, + "platform_confluent_io_type": true, + "statefulset_kubernetes_io_pod_name": true, + "type": true + }, + "indexByName": { + "Value": 4, + "partition": 3, + "pod": 1, + "topic": 2 + }, + "renameByName": { + "Value": "offset" + } + } + }, + { + "id": "convertFieldType", + "options": { + "conversions": [ + { + "destinationType": "number", + "targetField": "partition" + } + ], + "fields": {} + } + }, + { + "id": "sortBy", + "options": { + "fields": {}, + "sort": [ + { + "field": "topic" + } + ] + } + }, + { + "id": "sortBy", + "options": { + "fields": {}, + "sort": [ + { + "field": "partition" + } + ] + } + } + ], + "transparent": false, + "type": "table" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Offsets", + "transformations": [], + "transparent": false, + "type": "row" + } + ], + "refresh": "30s", + "rows": [], + "schemaVersion": 12, + "sharedCrosshair": false, + "style": "dark", + "tags": [ + "confluent", + "kafka" + ], + "templating": { + "list": [ + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "Prometheus", + "hide": 0, + "includeAll": false, + "label": "Namespace", + "multi": false, + "name": "ns", + "options": [], + "query": "label_values(namespace)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "Prometheus", + "hide": 0, + "includeAll": true, + "label": "Topic", + "multi": true, + "name": "topic", + "options": [], + "query": "label_values(kafka_log_log_size{namespace=\"$ns\"}, topic)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "hidden": false, + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Kafka topics - v2", + "uid": null, + "version": 0 +} diff --git a/grafana-dashboards/kafka-topics.py b/grafana-dashboards/kafka-topics.py new file mode 100644 index 00000000..2fc819c2 --- /dev/null +++ b/grafana-dashboards/kafka-topics.py @@ -0,0 +1,249 @@ +import grafanalib.core as G + +defaultHeight = 10 +tsWidth = 12 +tableWidth = 12 + +templating = G.Templating( + list=[ + G.Template( + name="ns", + label="Namespace", + dataSource="Prometheus", + query="label_values(namespace)", + ), + G.Template( + name="topic", + label="Topic", + dataSource="Prometheus", + query='label_values(kafka_log_log_size{namespace="$ns"}, topic)', + multi=True, + includeAll=True, + ), + ] +) + +topk = "10" + +throughput_base = 0 +throughput_layers = 3 +throughput_panels = [ + G.RowPanel( + title="Throughput", + gridPos=G.GridPos(h=1, w=24, x=0, y=throughput_base), + ), + G.TimeSeries( + title="Messages In/Sec", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ', sum without(instance,pod,statefulset_kubernetes_io_pod_name) (rate(kafka_server_brokertopicmetrics_messagesinpersec{topic=~"$topic",namespace="$ns"}[5m])))', + legendFormat="{{topic}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="cps", + stacking={"mode": "normal"}, + gridPos=G.GridPos(h=defaultHeight, w=tsWidth, x=tsWidth * 0, y=throughput_base), + ), + G.TimeSeries( + title="Log size", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ', sum(kafka_log_log_size{namespace="$ns",topic=~"$topic"}) by (topic))', + legendFormat="{{topic}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="bytes", + stacking={"mode": "normal"}, + gridPos=G.GridPos(h=defaultHeight, w=tsWidth, x=tsWidth * 1, y=throughput_base), + ), + G.TimeSeries( + title="Bytes In/Sec", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ', sum without(instance,pod,statefulset_kubernetes_io_pod_name) (rate(kafka_server_brokertopicmetrics_bytesinpersec{topic=~"$topic",namespace="$ns"}[5m])))', + legendFormat="{{topic}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="binBps", + stacking={"mode": "normal"}, + gridPos=G.GridPos( + h=defaultHeight, w=tsWidth, x=tsWidth * 0, y=throughput_base + 1 + ), + ), + G.TimeSeries( + title="Bytes Out/Sec", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ', sum without(instance,pod,statefulset_kubernetes_io_pod_name) (rate(kafka_server_brokertopicmetrics_bytesoutpersec{topic=~"$topic",namespace="$ns"}[5m])))', + legendFormat="{{topic}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="binBps", + stacking={"mode": "normal"}, + gridPos=G.GridPos( + h=defaultHeight, w=tsWidth, x=tsWidth * 1, y=throughput_base + 1 + ), + ), + G.TimeSeries( + title="Produce Requests/Sec", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ', sum(rate(kafka_server_brokertopicmetrics_totalproducerequestspersec{ namespace="$ns", topic=~"$topic"}[5m])) by (topic))', + legendFormat="{{topic}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="reqps", + stacking={"mode": "normal"}, + gridPos=G.GridPos( + h=defaultHeight, w=tsWidth, x=tsWidth * 0, y=throughput_base + 2 + ), + ), + G.TimeSeries( + title="Consumer Fetch Requests/Sec", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ', sum(rate(kafka_server_brokertopicmetrics_totalfetchrequestspersec{ namespace="$ns", topic=~"$topic"}[5m])) by (topic))', + legendFormat="{{topic}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="reqps", + stacking={"mode": "normal"}, + gridPos=G.GridPos( + h=defaultHeight, w=tsWidth, x=tsWidth * 1, y=throughput_base + 2 + ), + ), +] + + +offsets_txs = [ + { + "id": "organize", + "options": { + "excludeByName": { + "Time": True, + "__name__": True, + "app": True, + "confluent_platform": True, + "controller_revision_hash": True, + "job": True, + "clusterId": True, + "confluentPlatform": True, + "instance": True, + "namespace": True, + "platform_confluent_io_type": True, + "statefulset_kubernetes_io_pod_name": True, + "type": True, + }, + "indexByName": { + "pod": 1, + "topic": 2, + "partition": 3, + "Value": 4, + }, + "renameByName": {"Value": "offset"}, + }, + }, + { + "id": "convertFieldType", + "options": { + "conversions": [{"destinationType": "number", "targetField": "partition"}], + "fields": {}, + }, + }, + {"id": "sortBy", "options": {"fields": {}, "sort": [{"field": "topic"}]}}, + { + "id": "sortBy", + "options": {"fields": {}, "sort": [{"field": "partition"}]}, + }, +] + +offsets_base = throughput_base + throughput_layers +offsets_inner = [ + G.Table( + title="Start Offsets", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_log_log_logstartoffset{namespace="$ns",topic=~"$topic"}', + legendFormat="{{topic}}", + format="table", + instant=True, + ), + ], + filterable=True, + transformations=offsets_txs, + gridPos=G.GridPos(h=defaultHeight, w=tableWidth, x=tableWidth * 0, y=offsets_base), + ), + G.Table( + title="End Offsets", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_log_log_logendoffset{namespace="$ns",topic=~"$topic"}', + legendFormat="{{topic}}", + format="table", + instant=True, + ), + ], + filterable=True, + transformations=offsets_txs, + gridPos=G.GridPos(h=defaultHeight, w=tableWidth, x=tableWidth * 1, y=offsets_base), + ), +] +offsets_panels = [ + G.RowPanel( + title="Offsets", + gridPos=G.GridPos(h=1, w=24, x=0, y=offsets_base), + collapsed=True, + panels=offsets_inner, + ), +] + +panels = throughput_panels + offsets_panels +dashboard = G.Dashboard( + title="Kafka topics - v2", + description="Overview of the Kafka topics", + tags=["confluent", "kafka"], + inputs=[ + G.DataSourceInput( + name="DS_PROMETHEUS", + label="Prometheus", + pluginId="prometheus", + pluginName="Prometheus", + ) + ], + templating=templating, + timezone="browser", + panels=panels, + refresh="30s", +).auto_panel_ids() diff --git a/grafana-dashboards/zookeeper-cluster.py b/grafana-dashboards/zookeeper-cluster.py index 26695eb5..7eaac84b 100644 --- a/grafana-dashboards/zookeeper-cluster.py +++ b/grafana-dashboards/zookeeper-cluster.py @@ -12,12 +12,12 @@ dataSource="Prometheus", query="label_values(namespace)", ), - G.Template( + G.Template( name="quantile", label="Quantile", dataSource="Prometheus", query='label_values(kafka_server_zookeeperclientmetrics_zookeeperrequestlatencyms{namespace="$ns"}, quantile)', - ), + ), ] ) @@ -204,7 +204,7 @@ ), ] -kafka_base = 2 + 1; +kafka_base = 2 + 1 kafka_inner = [ G.TimeSeries( title="Kafka: Request Latency", @@ -263,7 +263,7 @@ legendCalcs=["max", "mean", "last"], unit="ms", stacking={"mode": "normal"}, - gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 0, y=kafka_base+1), + gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 0, y=kafka_base + 1), ), G.TimeSeries( title="Kafka: Auth Failures on Connections/sec", @@ -278,7 +278,7 @@ legendCalcs=["max", "mean", "last"], unit="ms", stacking={"mode": "normal"}, - gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 1, y=kafka_base+1), + gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 1, y=kafka_base + 1), ), ] kafka_panels = [ From d46f881b26431604648251ee57116e2b2936226a Mon Sep 17 00:00:00 2001 From: Jorge Esteban Quilcate Otoya Date: Thu, 30 Jun 2022 15:08:09 +0100 Subject: [PATCH 15/28] feat: sr panels --- grafana-dashboards/Makefile | 1 + grafana-dashboards/confluent-platform.json | 8 +- grafana-dashboards/confluent-platform.py | 8 +- grafana-dashboards/kafka-cluster.json | 6 +- grafana-dashboards/kafka-cluster.py | 6 +- .../schema-registry-cluster.json | 910 ++++++++++++++++++ grafana-dashboards/schema-registry-cluster.py | 169 ++++ grafana-dashboards/zookeeper-cluster.json | 6 +- grafana-dashboards/zookeeper-cluster.py | 6 +- 9 files changed, 1100 insertions(+), 20 deletions(-) create mode 100644 grafana-dashboards/schema-registry-cluster.json create mode 100644 grafana-dashboards/schema-registry-cluster.py diff --git a/grafana-dashboards/Makefile b/grafana-dashboards/Makefile index b937e9f4..a547c8c1 100644 --- a/grafana-dashboards/Makefile +++ b/grafana-dashboards/Makefile @@ -3,3 +3,4 @@ all: generate-dashboard -o zookeeper-cluster.json zookeeper-cluster.py generate-dashboard -o kafka-cluster.json kafka-cluster.py generate-dashboard -o kafka-topics.json kafka-topics.py + generate-dashboard -o schema-registry-cluster.json schema-registry-cluster.py diff --git a/grafana-dashboards/confluent-platform.json b/grafana-dashboards/confluent-platform.json index b58498b5..e7c81d2d 100644 --- a/grafana-dashboards/confluent-platform.json +++ b/grafana-dashboards/confluent-platform.json @@ -1273,7 +1273,7 @@ "targets": [ { "datasource": null, - "expr": "sum(kafka_schema_registry_registered_count{namespace=\"$ns\"})", + "expr": "avg without(schema_type) (kafka_schema_registry_registered_count{namespace=\"$ns\"})", "format": "time_series", "hide": false, "instant": false, @@ -1288,7 +1288,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "SR: Sum of Registered Schemas", + "title": "SR: Registered Schemas", "transformations": [], "transparent": false, "type": "stat" @@ -1356,7 +1356,7 @@ "targets": [ { "datasource": null, - "expr": "sum(kafka_schema_registry_schemas_deleted{namespace=\"$ns\"})", + "expr": "avg without(schema_type) (kafka_schema_registry_schemas_deleted{namespace=\"$ns\"})", "format": "time_series", "hide": false, "instant": false, @@ -1371,7 +1371,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "SR: Sum of Deleted Schemas", + "title": "SR: Deleted Schemas", "transformations": [], "transparent": false, "type": "stat" diff --git a/grafana-dashboards/confluent-platform.py b/grafana-dashboards/confluent-platform.py index 4b964402..90d954cf 100644 --- a/grafana-dashboards/confluent-platform.py +++ b/grafana-dashboards/confluent-platform.py @@ -225,11 +225,11 @@ gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 0, y=2), ), G.Stat( - title="SR: Sum of Registered Schemas", + title="SR: Registered Schemas", dataSource="${DS_PROMETHEUS}", targets=[ G.Target( - expr='sum(kafka_schema_registry_registered_count{namespace="$ns"})', + expr='avg without(schema_type) (kafka_schema_registry_registered_count{namespace="$ns"})', ), ], reduceCalc="last", @@ -239,11 +239,11 @@ gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 1, y=2), ), G.Stat( - title="SR: Sum of Deleted Schemas", + title="SR: Deleted Schemas", dataSource="${DS_PROMETHEUS}", targets=[ G.Target( - expr='sum(kafka_schema_registry_schemas_deleted{namespace="$ns"})', + expr='avg without(schema_type) (kafka_schema_registry_schemas_deleted{namespace="$ns"})', ), ], reduceCalc="last", diff --git a/grafana-dashboards/kafka-cluster.json b/grafana-dashboards/kafka-cluster.json index 25e12780..240803ed 100644 --- a/grafana-dashboards/kafka-cluster.json +++ b/grafana-dashboards/kafka-cluster.json @@ -1216,7 +1216,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "Kafka: CPU usage", + "title": "CPU usage", "transformations": [], "transparent": false, "type": "timeseries" @@ -1316,7 +1316,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "Kafka: Memory usage", + "title": "Memory usage", "transformations": [], "transparent": false, "type": "timeseries" @@ -1416,7 +1416,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "Kafka: GC collection", + "title": "GC collection", "transformations": [], "transparent": false, "type": "timeseries" diff --git a/grafana-dashboards/kafka-cluster.py b/grafana-dashboards/kafka-cluster.py index 3c5e0f31..8c87fef5 100644 --- a/grafana-dashboards/kafka-cluster.py +++ b/grafana-dashboards/kafka-cluster.py @@ -223,7 +223,7 @@ gridPos=G.GridPos(h=1, w=24, x=0, y=system_base), ), G.TimeSeries( - title="Kafka: CPU usage", + title="CPU usage", dataSource="${DS_PROMETHEUS}", targets=[ G.Target( @@ -237,7 +237,7 @@ gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 0, y=system_base), ), G.TimeSeries( - title="Kafka: Memory usage", + title="Memory usage", dataSource="${DS_PROMETHEUS}", targets=[ G.Target( @@ -251,7 +251,7 @@ gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 1, y=system_base), ), G.TimeSeries( - title="Kafka: GC collection", + title="GC collection", dataSource="${DS_PROMETHEUS}", targets=[ G.Target( diff --git a/grafana-dashboards/schema-registry-cluster.json b/grafana-dashboards/schema-registry-cluster.json new file mode 100644 index 00000000..d090a584 --- /dev/null +++ b/grafana-dashboards/schema-registry-cluster.json @@ -0,0 +1,910 @@ +{ + "__inputs": [ + { + "description": "", + "label": "Prometheus", + "name": "DS_PROMETHEUS", + "pluginId": "prometheus", + "pluginName": "Prometheus", + "type": "datasource" + } + ], + "annotations": { + "list": [] + }, + "description": "Overview of the Schema Registry cluster", + "editable": true, + "gnetId": null, + "hideControls": false, + "id": null, + "links": [], + "panels": [ + { + "cacheTimeout": null, + "collapsed": false, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 1, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Health-check", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + }, + { + "color": "green", + "index": 2, + "line": true, + "op": "gt", + "value": 2.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 2, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "count(kafka_schema_registry_registered_count{namespace=\"$ns\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "SR: Online instances", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 4, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 3, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "avg(kafka_schema_registry_registered_count{namespace=\"$ns\"}) by (schema_type)", + "format": "time_series", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "SR: Sum of Registered Schemas by Type", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 8, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 4, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "avg(kafka_schema_registry_schemas_created{namespace=\"$ns\"}) by (schema_type)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{schema_type}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "SR: Sum of Created Schemas", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 12, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 5, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_schema_registry_schemas_deleted{namespace=\"$ns\"}) by (schema_type)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{schema_type}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "SR: Sum of Deleted Schemas", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 16, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 6, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_schema_registry_kafka_schema_registry_metrics_connection_count)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "SR: Sum of Active Connections", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "collapsed": false, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 7, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "System", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 8, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "irate(process_cpu_seconds_total{namespace=\"$ns\",type=\"schemaregistry\"}[5m])", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU usage", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 9, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum without(area)(jvm_memory_bytes_used{namespace=\"$ns\",type=\"schemaregistry\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory usage", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 10, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum without(gc)(irate(jvm_gc_collection_seconds_sum{namespace=\"$ns\",type=\"schemaregistry\"}[5m]))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "GC collection", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "refresh": "30s", + "rows": [], + "schemaVersion": 12, + "sharedCrosshair": false, + "style": "dark", + "tags": [ + "confluent", + "schema-registry" + ], + "templating": { + "list": [ + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "Prometheus", + "hide": 0, + "includeAll": false, + "label": "Namespace", + "multi": false, + "name": "ns", + "options": [], + "query": "label_values(namespace)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "hidden": false, + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Schema Registry cluster - v2", + "uid": null, + "version": 0 +} diff --git a/grafana-dashboards/schema-registry-cluster.py b/grafana-dashboards/schema-registry-cluster.py new file mode 100644 index 00000000..cec8e70e --- /dev/null +++ b/grafana-dashboards/schema-registry-cluster.py @@ -0,0 +1,169 @@ +import grafanalib.core as G + +defaultHeight = 5 +statWidth = 4 +tsWidth = 8 + +templating = G.Templating( + list=[ + G.Template( + name="ns", + label="Namespace", + dataSource="Prometheus", + query="label_values(namespace)", + ), + ] +) + +healthcheck_base=0 +healthcheck_panels=[ + G.RowPanel( + title="Health-check", + gridPos=G.GridPos(h=1, w=24, x=0, y=0), + ), + G.Stat( + title="SR: Online instances", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='count(kafka_schema_registry_registered_count{namespace="$ns"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="red"), + G.Threshold(index=1, value=1.0, color="yellow"), + G.Threshold(index=2, value=2.0, color="green"), + ], + gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 0, y=healthcheck_base), + ), + G.Stat( + title="SR: Sum of Registered Schemas by Type", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='avg(kafka_schema_registry_registered_count{namespace="$ns"}) by (schema_type)', + instant=True, + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 1, y=healthcheck_base), + ), + G.Stat( + title="SR: Sum of Created Schemas", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='avg(kafka_schema_registry_schemas_created{namespace="$ns"}) by (schema_type)', + legendFormat="{{schema_type}}", + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 2, y=healthcheck_base), + ), + G.Stat( + title="SR: Sum of Deleted Schemas", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='sum(kafka_schema_registry_schemas_deleted{namespace="$ns"}) by (schema_type)', + legendFormat="{{schema_type}}", + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 3, y=healthcheck_base), + ), + G.Stat( + title="SR: Sum of Active Connections", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='sum(kafka_schema_registry_kafka_schema_registry_metrics_connection_count)', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 4, y=healthcheck_base), + ), + +] + +system_panels=[ + G.RowPanel( + title="System", + gridPos=G.GridPos(h=1, w=24, x=0, y=1), + ), + G.TimeSeries( + title="CPU usage", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='irate(process_cpu_seconds_total{namespace="$ns",type="schemaregistry"}[5m])', + legendFormat="{{pod}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="percentunit", + gridPos=G.GridPos(h=defaultHeight * 2, w=tsWidth, x=tsWidth * 0, y=1), + ), + G.TimeSeries( + title="Memory usage", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='sum without(area)(jvm_memory_bytes_used{namespace="$ns",type="schemaregistry"})', + legendFormat="{{pod}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="bytes", + gridPos=G.GridPos(h=defaultHeight * 2, w=tsWidth, x=tsWidth * 1, y=1), + ), + G.TimeSeries( + title="GC collection", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='sum without(gc)(irate(jvm_gc_collection_seconds_sum{namespace="$ns",type="schemaregistry"}[5m]))', + legendFormat="{{pod}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="percentunit", + gridPos=G.GridPos(h=defaultHeight * 2, w=tsWidth, x=tsWidth * 2, y=1), + ), +] + +panels = healthcheck_panels + system_panels + +dashboard = G.Dashboard( + title="Schema Registry cluster - v2", + description="Overview of the Schema Registry cluster", + tags=["confluent", "schema-registry"], + inputs=[ + G.DataSourceInput( + name="DS_PROMETHEUS", + label="Prometheus", + pluginId="prometheus", + pluginName="Prometheus", + ) + ], + templating=templating, + timezone="browser", + panels=panels, + refresh="30s", +).auto_panel_ids() diff --git a/grafana-dashboards/zookeeper-cluster.json b/grafana-dashboards/zookeeper-cluster.json index 08dc89d5..e9d5bd1a 100644 --- a/grafana-dashboards/zookeeper-cluster.json +++ b/grafana-dashboards/zookeeper-cluster.json @@ -643,7 +643,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "ZK: CPU usage", + "title": "CPU usage", "transformations": [], "transparent": false, "type": "timeseries" @@ -743,7 +743,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "ZK: Memory usage", + "title": "Memory usage", "transformations": [], "transparent": false, "type": "timeseries" @@ -843,7 +843,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "ZK: GC collection", + "title": "GC collection", "transformations": [], "transparent": false, "type": "timeseries" diff --git a/grafana-dashboards/zookeeper-cluster.py b/grafana-dashboards/zookeeper-cluster.py index 7eaac84b..2e60aad1 100644 --- a/grafana-dashboards/zookeeper-cluster.py +++ b/grafana-dashboards/zookeeper-cluster.py @@ -106,7 +106,7 @@ gridPos=G.GridPos(h=1, w=24, x=0, y=1), ), G.TimeSeries( - title="ZK: CPU usage", + title="CPU usage", dataSource="${DS_PROMETHEUS}", targets=[ G.Target( @@ -120,7 +120,7 @@ gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 0, y=1), ), G.TimeSeries( - title="ZK: Memory usage", + title="Memory usage", dataSource="${DS_PROMETHEUS}", targets=[ G.Target( @@ -134,7 +134,7 @@ gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 1, y=1), ), G.TimeSeries( - title="ZK: GC collection", + title="GC collection", dataSource="${DS_PROMETHEUS}", targets=[ G.Target( From 91579d5b21479c487e25c1467b75fcb61088e110 Mon Sep 17 00:00:00 2001 From: Jorge Esteban Quilcate Otoya Date: Thu, 30 Jun 2022 19:27:08 +0100 Subject: [PATCH 16/28] latest updates --- grafana-dashboards/Makefile | 1 + grafana-dashboards/kafka-connect-cluster.json | 1082 +++++++++++++++++ grafana-dashboards/kafka-connect-cluster.py | 206 ++++ 3 files changed, 1289 insertions(+) create mode 100644 grafana-dashboards/kafka-connect-cluster.json create mode 100644 grafana-dashboards/kafka-connect-cluster.py diff --git a/grafana-dashboards/Makefile b/grafana-dashboards/Makefile index a547c8c1..f8bb8246 100644 --- a/grafana-dashboards/Makefile +++ b/grafana-dashboards/Makefile @@ -4,3 +4,4 @@ all: generate-dashboard -o kafka-cluster.json kafka-cluster.py generate-dashboard -o kafka-topics.json kafka-topics.py generate-dashboard -o schema-registry-cluster.json schema-registry-cluster.py + generate-dashboard -o kafka-connect-cluster.json kafka-connect-cluster.py diff --git a/grafana-dashboards/kafka-connect-cluster.json b/grafana-dashboards/kafka-connect-cluster.json new file mode 100644 index 00000000..54090197 --- /dev/null +++ b/grafana-dashboards/kafka-connect-cluster.json @@ -0,0 +1,1082 @@ +{ + "__inputs": [ + { + "description": "", + "label": "Prometheus", + "name": "DS_PROMETHEUS", + "pluginId": "prometheus", + "pluginName": "Prometheus", + "type": "datasource" + } + ], + "annotations": { + "list": [] + }, + "description": "Overview of the Kafka Connect cluster", + "editable": true, + "gnetId": null, + "hideControls": false, + "id": null, + "links": [], + "panels": [ + { + "cacheTimeout": null, + "collapsed": false, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 1, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Health-check", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 2, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "value_and_name" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "count(kafka_connect_app_info{namespace=\"$ns\",app=\"$connect_app\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{version}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connect: Online Workers", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 4, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 3, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_connect_connect_worker_metrics_connector_total_task_count{namespace=\"$ns\",app=\"$connect_app\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connect: Sum of Total Tasks", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "green", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 8, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 4, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_connect_connect_worker_metrics_connector_running_task_count{namespace=\"$ns\",app=\"$connect_app\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connect: Sum of Running Tasks", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 12, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 5, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_connect_connect_worker_metrics_connector_paused_task_count{namespace=\"$ns\",app=\"$connect_app\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connect: Sum of Paused Tasks", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "red", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 16, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 6, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_connect_connect_worker_metrics_connector_failed_task_count{namespace=\"$ns\",app=\"$connect_app\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connect: Sum of Failed Tasks", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "clockms" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 20, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 7, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_connect_worker_rebalance_metrics_time_since_last_rebalance_ms{namespace=\"$ns\",app=~\"$connect_app\"} >= 0", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connect: Time since last rebalance", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "collapsed": false, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 8, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "System", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 9, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "irate(process_cpu_seconds_total{namespace=\"$ns\",app=\"$connect_app\",type=\"connect\"}[5m])", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU usage", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 10, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum without(area)(jvm_memory_bytes_used{namespace=\"$ns\",app=\"$connect_app\",type=\"connect\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory usage", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 11, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum without(gc)(irate(jvm_gc_collection_seconds_sum{namespace=\"$ns\",app=\"$connect_app\",type=\"connect\"}[5m]))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "GC collection", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "refresh": "30s", + "rows": [], + "schemaVersion": 12, + "sharedCrosshair": false, + "style": "dark", + "tags": [ + "confluent", + "kafka-connect" + ], + "templating": { + "list": [ + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "Prometheus", + "hide": 0, + "includeAll": false, + "label": "Namespace", + "multi": false, + "name": "ns", + "options": [], + "query": "label_values(namespace)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "Prometheus", + "hide": 0, + "includeAll": false, + "label": "Connect group", + "multi": false, + "name": "connect_app", + "options": [], + "query": "label_values(kafka_connect_connect_worker_metrics_connector_total_task_count{namespace=\"$ns\"}, app)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "Prometheus", + "hide": 0, + "includeAll": true, + "label": "Connect worker", + "multi": true, + "name": "connect_worker", + "options": [], + "query": "label_values(kafka_connect_connect_worker_metrics_connector_total_task_count{namespace=\"$ns\",app=\"$connect_app\"}, pod)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "Prometheus", + "hide": 0, + "includeAll": false, + "label": "Connector", + "multi": false, + "name": "connector", + "options": [], + "query": "label_values(kafka_connect_connector_task_metrics_pause_ratio{namespace=\"$ns\",app=\"$connector_app\"}, connector)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "hidden": false, + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Kafka Connect cluster - v2", + "uid": null, + "version": 0 +} diff --git a/grafana-dashboards/kafka-connect-cluster.py b/grafana-dashboards/kafka-connect-cluster.py new file mode 100644 index 00000000..5398fc8a --- /dev/null +++ b/grafana-dashboards/kafka-connect-cluster.py @@ -0,0 +1,206 @@ +import grafanalib.core as G + +defaultHeight = 5 +statWidth = 4 +tsWidth = 8 + +templating = G.Templating( + list=[ + G.Template( + name="ns", + label="Namespace", + dataSource="Prometheus", + query="label_values(namespace)", + ), + G.Template( + name="connect_app", + label="Connect group", + dataSource="Prometheus", + query='label_values(kafka_connect_connect_worker_metrics_connector_total_task_count{namespace="$ns"}, app)', + ), + G.Template( + name="connect_worker", + label="Connect worker", + dataSource="Prometheus", + query='label_values(kafka_connect_connect_worker_metrics_connector_total_task_count{namespace="$ns",app="$connect_app"}, pod)', + multi=True, + includeAll=True, + ), + G.Template( + name="connector", + label="Connector", + dataSource="Prometheus", + query='label_values(kafka_connect_connector_task_metrics_pause_ratio{namespace="$ns",app="$connector_app"}, connector)', + ), + ] +) + +hc_base=0 +hc_panels = [ + G.RowPanel( + title="Health-check", + gridPos=G.GridPos(h=1, w=24, x=0, y=hc_base), + ), + G.Stat( + title="Connect: Online Workers", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='count(kafka_connect_app_info{namespace="$ns",app="$connect_app"})', + legendFormat='{{version}}' + ), + ], + reduceCalc="last", + textMode="value_and_name", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 0, y=hc_base), + ), + G.Stat( + title="Connect: Sum of Total Tasks", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='sum(kafka_connect_connect_worker_metrics_connector_total_task_count{namespace="$ns",app="$connect_app"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 1, y=hc_base), + ), + G.Stat( + title="Connect: Sum of Running Tasks", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='sum(kafka_connect_connect_worker_metrics_connector_running_task_count{namespace="$ns",app="$connect_app"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + G.Threshold(index=1, value=1.0, color="green"), + ], + gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 2, y=hc_base), + ), + G.Stat( + title="Connect: Sum of Paused Tasks", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='sum(kafka_connect_connect_worker_metrics_connector_paused_task_count{namespace="$ns",app="$connect_app"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + G.Threshold(index=1, value=1.0, color="yellow"), + ], + gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 3, y=hc_base), + ), + G.Stat( + title="Connect: Sum of Failed Tasks", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='sum(kafka_connect_connect_worker_metrics_connector_failed_task_count{namespace="$ns",app="$connect_app"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + G.Threshold(index=1, value=1.0, color="red"), + ], + gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 4, y=hc_base), + ), + G.Stat( + title="Connect: Time since last rebalance", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_connect_connect_worker_rebalance_metrics_time_since_last_rebalance_ms{namespace="$ns",app=~"$connect_app"} >= 0', + legendFormat="{{pod}}", + ), + ], + reduceCalc="last", + format="clockms", + graphMode="none", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 5, y=hc_base), + ), +] + +system_base=hc_base + 1; +system_panels=[ + G.RowPanel( + title="System", + gridPos=G.GridPos(h=1, w=24, x=0, y=system_base), + ), + G.TimeSeries( + title="CPU usage", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='irate(process_cpu_seconds_total{namespace="$ns",app="$connect_app",type="connect"}[5m])', + legendFormat="{{pod}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="percentunit", + gridPos=G.GridPos(h=defaultHeight * 2, w=tsWidth, x=tsWidth * 0, y=system_base), + ), + G.TimeSeries( + title="Memory usage", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='sum without(area)(jvm_memory_bytes_used{namespace="$ns",app="$connect_app",type="connect"})', + legendFormat="{{pod}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="bytes", + gridPos=G.GridPos(h=defaultHeight * 2, w=tsWidth, x=tsWidth * 1, y=system_base), + ), + G.TimeSeries( + title="GC collection", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='sum without(gc)(irate(jvm_gc_collection_seconds_sum{namespace="$ns",app="$connect_app",type="connect"}[5m]))', + legendFormat="{{pod}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="percentunit", + gridPos=G.GridPos(h=defaultHeight * 2, w=tsWidth, x=tsWidth * 2, y=system_base), + ), +] + +panels = hc_panels + system_panels + +dashboard = G.Dashboard( + title="Kafka Connect cluster - v2", + description="Overview of the Kafka Connect cluster", + tags=["confluent", "kafka-connect"], + inputs=[ + G.DataSourceInput( + name="DS_PROMETHEUS", + label="Prometheus", + pluginId="prometheus", + pluginName="Prometheus", + ) + ], + templating=templating, + timezone="browser", + panels=panels, + refresh="30s", +).auto_panel_ids() From f416896753218cae369066b73aefe809f40ea4df Mon Sep 17 00:00:00 2001 From: Jorge Esteban Quilcate Otoya Date: Fri, 8 Jul 2022 13:23:21 +0100 Subject: [PATCH 17/28] feat: connect cluster dashboard --- grafana-dashboards/confluent-platform.json | 1881 +++++----- grafana-dashboards/confluent-platform.py | 55 +- grafana-dashboards/kafka-connect-cluster.json | 3278 ++++++++++++++++- grafana-dashboards/kafka-connect-cluster.py | 635 +++- grafana-dashboards/kafka-topics.py | 8 +- .../schema-registry-cluster.json | 8 +- grafana-dashboards/schema-registry-cluster.py | 37 +- 7 files changed, 4879 insertions(+), 1023 deletions(-) diff --git a/grafana-dashboards/confluent-platform.json b/grafana-dashboards/confluent-platform.json index e7c81d2d..41710d5e 100644 --- a/grafana-dashboards/confluent-platform.json +++ b/grafana-dashboards/confluent-platform.json @@ -1273,10 +1273,10 @@ "targets": [ { "datasource": null, - "expr": "avg without(schema_type) (kafka_schema_registry_registered_count{namespace=\"$ns\"})", + "expr": "avg(kafka_schema_registry_registered_count{namespace=\"$ns\"})", "format": "time_series", "hide": false, - "instant": false, + "instant": true, "interval": "", "intervalFactor": 2, "legendFormat": "", @@ -1288,7 +1288,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "SR: Registered Schemas", + "title": "SR: Sum of Registered Schemas", "transformations": [], "transparent": false, "type": "stat" @@ -1356,13 +1356,13 @@ "targets": [ { "datasource": null, - "expr": "avg without(schema_type) (kafka_schema_registry_schemas_deleted{namespace=\"$ns\"})", + "expr": "avg(kafka_schema_registry_schemas_created{namespace=\"$ns\"}) by (schema_type)", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 2, - "legendFormat": "", + "legendFormat": "{{schema_type}}", "metric": "", "refId": "", "step": 10, @@ -1371,52 +1371,11 @@ ], "timeFrom": null, "timeShift": null, - "title": "SR: Deleted Schemas", + "title": "SR: Sum of Created Schemas by Type", "transformations": [], "transparent": false, "type": "stat" }, - { - "cacheTimeout": null, - "collapsed": false, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 3 - }, - "height": null, - "hideTimeOverride": false, - "id": 18, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [], - "repeat": "connect_app", - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Kafka Connect cluster: $connect_app", - "transformations": [], - "transparent": false, - "type": "row" - }, { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", @@ -1449,12 +1408,12 @@ "gridPos": { "h": 5, "w": 4, - "x": 0, - "y": 3 + "x": 12, + "y": 2 }, "height": null, "hideTimeOverride": false, - "id": 19, + "id": 18, "interval": null, "links": [], "maxDataPoints": 100, @@ -1480,13 +1439,13 @@ "targets": [ { "datasource": null, - "expr": "count(kafka_connect_connect_worker_metrics_connector_count{namespace=\"$ns\",app=~\"$connect_app\"})", + "expr": "sum(kafka_schema_registry_schemas_deleted{namespace=\"$ns\"}) by (schema_type)", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 2, - "legendFormat": "", + "legendFormat": "{{schema_type}}", "metric": "", "refId": "", "step": 10, @@ -1495,929 +1454,1055 @@ ], "timeFrom": null, "timeShift": null, - "title": "Connect: Online Workers", + "title": "SR: Sum of Deleted Schemas by Type", "transformations": [], "transparent": false, "type": "stat" }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "collapsed": true, + "datasource": null, "description": null, "editable": true, "error": false, "fieldConfig": { "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] + "steps": [] + } + } }, "gridPos": { - "h": 5, - "w": 4, - "x": 4, + "h": 1, + "w": 24, + "x": 0, "y": 3 }, "height": null, "hideTimeOverride": false, - "id": 20, + "id": 19, "interval": null, "links": [], "maxDataPoints": 100, "maxPerRow": null, "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 0, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 20, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "count(kafka_connect_connect_worker_metrics_connector_count{namespace=\"$ns\",app=~\"$connect_app\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } ], - "fields": "", - "values": false + "timeFrom": null, + "timeShift": null, + "title": "Connect: Online Workers", + "transformations": [], + "transparent": false, + "type": "stat" }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ { - "datasource": null, - "expr": "sum(kafka_connect_connect_worker_metrics_connector_total_task_count{namespace=\"$ns\",app=~\"$connect_app\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 4, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 21, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_connect_connect_worker_metrics_connector_total_task_count{namespace=\"$ns\",app=~\"$connect_app\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connect: Sum of Total Tasks", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "green", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 8, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 22, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_connect_connect_worker_metrics_connector_running_task_count{namespace=\"$ns\",app=~\"$connect_app\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connect: Sum of Running Tasks", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 12, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 23, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_connect_connect_worker_metrics_connector_paused_task_count{namespace=\"$ns\",app=~\"$connect_app\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connect: Sum of Paused Tasks", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "red", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 16, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 24, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_connect_connect_worker_metrics_connector_failed_task_count{namespace=\"$ns\",app=~\"$connect_app\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connect: Sum of Failed Tasks", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "clockms" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 20, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 25, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_connect_worker_rebalance_metrics_time_since_last_rebalance_ms{namespace=\"$ns\",app=~\"$connect_app\"} >= 0", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connect: Time since last rebalance", + "transformations": [], + "transparent": false, + "type": "stat" } ], + "repeat": "connect_app", + "repeatDirection": null, + "span": null, + "targets": [], "timeFrom": null, "timeShift": null, - "title": "Connect: Sum of Total Tasks", + "title": "Kafka Connect cluster: $connect_app", "transformations": [], "transparent": false, - "type": "stat" + "type": "row" }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "collapsed": true, + "datasource": null, "description": null, "editable": true, "error": false, "fieldConfig": { "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - }, - { - "color": "green", - "index": 1, - "line": true, - "op": "gt", - "value": 1.0, - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] + "steps": [] + } + } }, "gridPos": { - "h": 5, - "w": 4, - "x": 8, - "y": 3 + "h": 1, + "w": 24, + "x": 0, + "y": 4 }, "height": null, "hideTimeOverride": false, - "id": 21, + "id": 26, "interval": null, "links": [], "maxDataPoints": 100, "maxPerRow": null, "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 0, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 27, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "count(ksql_ksql_engine_query_stats_num_active_queries{namespace=\"$ns\", app=\"$ksqldb_app\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } ], - "fields": "", - "values": false + "timeFrom": null, + "timeShift": null, + "title": "ksqlDB: Online Servers", + "transformations": [], + "transparent": false, + "type": "stat" }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ { - "datasource": null, - "expr": "sum(kafka_connect_connect_worker_metrics_connector_running_task_count{namespace=\"$ns\",app=~\"$connect_app\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Connect: Sum of Running Tasks", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - }, - { - "color": "yellow", - "index": 1, - "line": true, - "op": "gt", - "value": 1.0, - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 12, - "y": 3 - }, - "height": null, - "hideTimeOverride": false, - "id": 22, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(kafka_connect_connect_worker_metrics_connector_paused_task_count{namespace=\"$ns\",app=~\"$connect_app\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Connect: Sum of Paused Tasks", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - }, - { - "color": "red", - "index": 1, - "line": true, - "op": "gt", - "value": 1.0, - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 16, - "y": 3 - }, - "height": null, - "hideTimeOverride": false, - "id": 23, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(kafka_connect_connect_worker_metrics_connector_failed_task_count{namespace=\"$ns\",app=~\"$connect_app\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Connect: Sum of Failed Tasks", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - } - ] - }, - "unit": "clockms" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 20, - "y": 3 - }, - "height": null, - "hideTimeOverride": false, - "id": 24, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_connect_connect_worker_rebalance_metrics_time_since_last_rebalance_ms{namespace=\"$ns\",app=~\"$connect_app\"} >= 0", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Connect: Time since last rebalance", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "collapsed": false, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 4 - }, - "height": null, - "hideTimeOverride": false, - "id": 25, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [], - "repeat": "ksqldb_app", - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "ksqlDB cluster: $ksqldb_app", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 0, - "y": 4 - }, - "height": null, - "hideTimeOverride": false, - "id": 26, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "count(ksql_ksql_engine_query_stats_num_active_queries{namespace=\"$ns\", app=\"$ksqldb_app\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "ksqlDB: Online Servers", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 4, - "y": 4 - }, - "height": null, - "hideTimeOverride": false, - "id": 27, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(ksql_ksql_engine_query_stats_num_active_queries{namespace=\"$ns\", app=\"$ksqldb_app\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "ksqlDB: Sum of Active Queries", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] }, - { - "color": "green", - "index": 1, - "line": true, - "op": "gt", - "value": 1.0, - "yaxis": "left" - } - ] + "unit": "none" + }, + "overrides": [] }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 8, - "y": 4 - }, - "height": null, - "hideTimeOverride": false, - "id": 28, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" + "gridPos": { + "h": 5, + "w": 4, + "x": 4, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 28, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(ksql_ksql_engine_query_stats_num_active_queries{namespace=\"$ns\", app=\"$ksqldb_app\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } ], - "fields": "", - "values": false + "timeFrom": null, + "timeShift": null, + "title": "ksqlDB: Sum of Active Queries", + "transformations": [], + "transparent": false, + "type": "stat" }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ { - "datasource": null, - "expr": "sum(ksql_ksql_engine_query_stats_running_queries{namespace=\"$ns\", app=\"$ksqldb_app\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "ksqlDB: Sum of Running Queries", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "green", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] }, - { - "color": "yellow", - "index": 1, - "line": true, - "op": "gt", - "value": 1.0, - "yaxis": "left" - } - ] + "unit": "none" + }, + "overrides": [] }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 12, - "y": 4 - }, - "height": null, - "hideTimeOverride": false, - "id": 29, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" + "gridPos": { + "h": 5, + "w": 4, + "x": 8, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 29, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(ksql_ksql_engine_query_stats_running_queries{namespace=\"$ns\", app=\"$ksqldb_app\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } ], - "fields": "", - "values": false + "timeFrom": null, + "timeShift": null, + "title": "ksqlDB: Sum of Running Queries", + "transformations": [], + "transparent": false, + "type": "stat" }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ { - "datasource": null, - "expr": "sum(ksql_ksql_engine_query_stats_rebalancing_queries{namespace=\"$ns\", app=\"$ksqldb_app\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "ksqlDB: Sum of Rebalancing Queries", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] }, - { - "color": "red", - "index": 1, - "line": true, - "op": "gt", - "value": 1.0, - "yaxis": "left" - } - ] + "unit": "none" + }, + "overrides": [] }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 16, - "y": 4 - }, - "height": null, - "hideTimeOverride": false, - "id": 30, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" + "gridPos": { + "h": 5, + "w": 4, + "x": 12, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 30, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(ksql_ksql_engine_query_stats_rebalancing_queries{namespace=\"$ns\", app=\"$ksqldb_app\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } ], - "fields": "", - "values": false + "timeFrom": null, + "timeShift": null, + "title": "ksqlDB: Sum of Rebalancing Queries", + "transformations": [], + "transparent": false, + "type": "stat" }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ { - "datasource": null, - "expr": "avg(ksql_ksql_engine_query_stats_error_queries{namespace=\"$ns\", app=\"$ksqldb_app\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "red", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 16, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 31, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "avg(ksql_ksql_engine_query_stats_error_queries{namespace=\"$ns\", app=\"$ksqldb_app\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connect: Sum of Failed Queries", + "transformations": [], + "transparent": false, + "type": "stat" } ], + "repeat": "ksqldb_app", + "repeatDirection": null, + "span": null, + "targets": [], "timeFrom": null, "timeShift": null, - "title": "Connect: Sum of Failed Queries", + "title": "ksqlDB cluster: $ksqldb_app", "transformations": [], "transparent": false, - "type": "stat" + "type": "row" } ], "refresh": "30s", diff --git a/grafana-dashboards/confluent-platform.py b/grafana-dashboards/confluent-platform.py index 90d954cf..15ec7b2e 100644 --- a/grafana-dashboards/confluent-platform.py +++ b/grafana-dashboards/confluent-platform.py @@ -225,11 +225,12 @@ gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 0, y=2), ), G.Stat( - title="SR: Registered Schemas", + title="SR: Sum of Registered Schemas", dataSource="${DS_PROMETHEUS}", targets=[ G.Target( - expr='avg without(schema_type) (kafka_schema_registry_registered_count{namespace="$ns"})', + expr='avg(kafka_schema_registry_registered_count{namespace="$ns"})', + instant=True, ), ], reduceCalc="last", @@ -239,11 +240,12 @@ gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 1, y=2), ), G.Stat( - title="SR: Deleted Schemas", + title="SR: Sum of Created Schemas by Type", dataSource="${DS_PROMETHEUS}", targets=[ G.Target( - expr='avg without(schema_type) (kafka_schema_registry_schemas_deleted{namespace="$ns"})', + expr='avg(kafka_schema_registry_schemas_created{namespace="$ns"}) by (schema_type)', + legendFormat="{{schema_type}}", ), ], reduceCalc="last", @@ -252,14 +254,24 @@ ], gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 2, y=2), ), + G.Stat( + title="SR: Sum of Deleted Schemas by Type", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='sum(kafka_schema_registry_schemas_deleted{namespace="$ns"}) by (schema_type)', + legendFormat="{{schema_type}}", + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 3, y=2), + ), ] -connect_panels = [ - G.RowPanel( - title="Kafka Connect cluster: $connect_app", - gridPos=G.GridPos(h=1, w=24, x=0, y=3), - repeat=G.Repeat(variable="connect_app"), - ), +connect_inner = [ G.Stat( title="Connect: Online Workers", dataSource="${DS_PROMETHEUS}", @@ -352,12 +364,17 @@ ), ] -ksqldb_panels = [ +connect_panels = [ G.RowPanel( - title="ksqlDB cluster: $ksqldb_app", - gridPos=G.GridPos(h=1, w=24, x=0, y=4), - repeat=G.Repeat(variable="ksqldb_app"), + title="Kafka Connect cluster: $connect_app", + gridPos=G.GridPos(h=1, w=24, x=0, y=3), + repeat=G.Repeat(variable="connect_app"), + collapsed=True, + panels=connect_inner, ), +] + +ksqldb_inner = [ G.Stat( title="ksqlDB: Online Servers", dataSource="${DS_PROMETHEUS}", @@ -433,6 +450,16 @@ ), ] +ksqldb_panels = [ + G.RowPanel( + title="ksqlDB cluster: $ksqldb_app", + gridPos=G.GridPos(h=1, w=24, x=0, y=4), + repeat=G.Repeat(variable="ksqldb_app"), + collapsed=True, + panels=ksqldb_inner, + ), +] + panels = zk_panels + kafka_panels + sr_panels + connect_panels + ksqldb_panels dashboard = G.Dashboard( diff --git a/grafana-dashboards/kafka-connect-cluster.json b/grafana-dashboards/kafka-connect-cluster.json index 54090197..0ccbb907 100644 --- a/grafana-dashboards/kafka-connect-cluster.json +++ b/grafana-dashboards/kafka-connect-cluster.json @@ -123,7 +123,7 @@ "targets": [ { "datasource": null, - "expr": "count(kafka_connect_app_info{namespace=\"$ns\",app=\"$connect_app\"})", + "expr": "count(kafka_connect_app_info{namespace=\"$ns\",app=\"$connect_app\",version!=\"\"})", "format": "time_series", "hide": false, "instant": false, @@ -562,7 +562,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_connect_connect_worker_rebalance_metrics_time_since_last_rebalance_ms{namespace=\"$ns\",app=~\"$connect_app\"} >= 0", + "expr": "kafka_connect_connect_worker_rebalance_metrics_time_since_last_rebalance_ms{namespace=\"$ns\",app=\"$connect_app\"} >= 0", "format": "time_series", "hide": false, "instant": false, @@ -584,21 +584,31 @@ }, { "cacheTimeout": null, - "collapsed": false, - "datasource": null, + "color": { + "mode": "thresholds" + }, + "columns": [], + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, "fieldConfig": { "defaults": { + "custom": { + "align": "auto", + "displayMode": "auto", + "filterable": false + }, "thresholds": { "mode": "absolute", "steps": [] } - } + }, + "overrides": [] }, + "fontSize": "100%", "gridPos": { - "h": 1, + "h": 5, "w": 24, "x": 0, "y": 1 @@ -608,108 +618,314 @@ "id": 8, "interval": null, "links": [], + "mappings": [], "maxDataPoints": 100, "maxPerRow": null, "minSpan": null, - "panels": [], + "options": { + "showHeader": true + }, "repeat": null, "repeatDirection": null, - "span": null, - "targets": [], + "span": 6, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_app_info{namespace=\"$ns\",app=\"$connect_app\",start_time_ms!=\"\"}", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "kafka_connect_app_info{namespace=\"$ns\",app=\"$connect_app\",version!=\"\"}", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "sum by (pod) (kafka_connect_connect_worker_metrics_connector_count{namespace=\"$ns\",app=\"$connect_app\"})", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "sum by (pod) (kafka_connect_connect_worker_metrics_connector_startup_success_total{namespace=\"$ns\",app=\"$connect_app\"})", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "sum by (pod) (kafka_connect_connect_worker_metrics_connector_startup_failure_total{namespace=\"$ns\",app=\"$connect_app\"})", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "sum by (pod) (kafka_connect_connect_worker_metrics_task_count{namespace=\"$ns\",app=\"$connect_app\"})", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "sum by (pod) (kafka_connect_connect_worker_metrics_task_startup_success_total{namespace=\"$ns\",app=\"$connect_app\"})", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "sum by (pod) (kafka_connect_connect_worker_metrics_task_startup_failure_total{namespace=\"$ns\",app=\"$connect_app\"})", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], "timeFrom": null, "timeShift": null, - "title": "System", - "transformations": [], + "title": "Connect Workers", + "transformations": [ + { + "id": "seriesToColumns", + "options": { + "byField": "pod" + } + }, + { + "id": "filterFieldsByName", + "options": { + "include": { + "names": [ + "pod", + "app 1", + "start_time_ms", + "version", + "Value #C", + "Value #D", + "Value #E", + "Value #F", + "Value #G", + "Value #H", + "namespace 1" + ] + } + } + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "app 1": 1, + "namespace 1": 0, + "pod": 2, + "start_time_ms": 3, + "version": 4 + }, + "renameByName": { + "Value #C": "connectors", + "Value #D": "conn. success", + "Value #E": "conn. failure", + "Value #F": "tasks", + "Value #G": "tasks success", + "Value #H": "tasks failure", + "app 1": "cluster", + "namespace 1": "namespace", + "pod": "worker", + "start_time_ms": "start time", + "version": "version" + } + } + }, + { + "id": "convertFieldType", + "options": { + "conversions": [ + { + "destinationType": "number", + "targetField": "start_time_ms" + } + ], + "fields": {} + } + } + ], "transparent": false, - "type": "row" + "type": "table" }, { "cacheTimeout": null, + "color": { + "mode": "thresholds" + }, + "columns": [], "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } + "align": "auto", + "displayMode": "auto", + "filterable": false }, - "mappings": [], "thresholds": { "mode": "absolute", "steps": [] - }, - "unit": "percentunit" + } }, "overrides": [] }, + "fontSize": "100%", "gridPos": { - "h": 10, - "w": 8, + "h": 5, + "w": 24, "x": 0, - "y": 1 + "y": 2 }, "height": null, "hideTimeOverride": false, "id": 9, "interval": null, "links": [], + "mappings": [], "maxDataPoints": 100, "maxPerRow": null, "minSpan": null, "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } + "showHeader": true }, "repeat": null, "repeatDirection": null, - "span": null, + "span": 6, "targets": [ { "datasource": null, - "expr": "irate(process_cpu_seconds_total{namespace=\"$ns\",app=\"$connect_app\",type=\"connect\"}[5m])", - "format": "time_series", + "expr": "kafka_connect_connector_info{namespace=\"$ns\",app=\"$connect_app\"}", + "format": "table", "hide": false, - "instant": false, + "instant": true, "interval": "", "intervalFactor": 2, - "legendFormat": "{{pod}}", + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "sum by (connector) (kafka_connect_connect_worker_metrics_connector_total_task_count{namespace=\"$ns\",app=\"$connect_app\"})", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "sum by (connector) (kafka_connect_connect_worker_metrics_connector_running_task_count{namespace=\"$ns\",app=\"$connect_app\"})", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "sum by (connector) (kafka_connect_connect_worker_metrics_connector_failed_task_count{namespace=\"$ns\",app=\"$connect_app\"})", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "sum by (connector) (kafka_connect_connect_worker_metrics_connector_paused_task_count{namespace=\"$ns\",app=\"$connect_app\"})", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", "metric": "", "refId": "", "step": 10, @@ -718,10 +934,43 @@ ], "timeFrom": null, "timeShift": null, - "title": "CPU usage", - "transformations": [], + "title": "Connectors", + "transformations": [ + { + "id": "seriesToColumns", + "options": { + "byField": "connector" + } + }, + { + "id": "filterFieldsByName", + "options": { + "include": { + "names": [ + "connector", + "Value #B", + "Value #C", + "Value #D", + "Value #E" + ] + } + } + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "renameByName": { + "Value #B": "tasks", + "Value #C": "running", + "Value #D": "failed", + "Value #E": "paused" + } + } + } + ], "transparent": false, - "type": "timeseries" + "type": "table" }, { "cacheTimeout": null, @@ -765,15 +1014,15 @@ "mode": "absolute", "steps": [] }, - "unit": "bytes" + "unit": "percentunit" }, "overrides": [] }, "gridPos": { "h": 10, - "w": 8, - "x": 8, - "y": 1 + "w": 12, + "x": 0, + "y": 3 }, "height": null, "hideTimeOverride": false, @@ -803,13 +1052,13 @@ "targets": [ { "datasource": null, - "expr": "sum without(area)(jvm_memory_bytes_used{namespace=\"$ns\",app=\"$connect_app\",type=\"connect\"})", + "expr": "kafka_connect_connector_task_metrics_running_ratio{namespace=\"$ns\",app=\"$connect_app\"}", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 2, - "legendFormat": "{{pod}}", + "legendFormat": "{{connector}}", "metric": "", "refId": "", "step": 10, @@ -818,7 +1067,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "Memory usage", + "title": "Tasks Running Ratio", "transformations": [], "transparent": false, "type": "timeseries" @@ -865,15 +1114,15 @@ "mode": "absolute", "steps": [] }, - "unit": "percentunit" + "unit": "ms" }, "overrides": [] }, "gridPos": { "h": 10, - "w": 8, - "x": 16, - "y": 1 + "w": 12, + "x": 12, + "y": 3 }, "height": null, "hideTimeOverride": false, @@ -903,7 +1152,348 @@ "targets": [ { "datasource": null, - "expr": "sum without(gc)(irate(jvm_gc_collection_seconds_sum{namespace=\"$ns\",app=\"$connect_app\",type=\"connect\"}[5m]))", + "expr": "kafka_connect_connect_worker_rebalance_metrics_rebalance_avg_time_ms{namespace=\"$ns\",app=\"$connect_app\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Rebalance Latency", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "collapsed": false, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 12, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "System", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 13, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "irate(process_cpu_seconds_total{namespace=\"$ns\",app=\"$connect_app\",type=\"connect\"}[5m])", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU usage", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 14, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum without(area)(jvm_memory_bytes_used{namespace=\"$ns\",app=\"$connect_app\",type=\"connect\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory usage", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 15, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum without(gc)(irate(jvm_gc_collection_seconds_sum{namespace=\"$ns\",app=\"$connect_app\",type=\"connect\"}[5m]))", "format": "time_series", "hide": false, "instant": false, @@ -921,7 +1511,2531 @@ "title": "GC collection", "transformations": [], "transparent": false, - "type": "timeseries" + "type": "timeseries" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 6 + }, + "height": null, + "hideTimeOverride": false, + "id": 16, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 6 + }, + "height": null, + "hideTimeOverride": false, + "id": 17, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_connector_task_metrics_batch_size_avg{namespace=\"$ns\",app=\"$connect_app\",pod=~\"$connect_worker\",connector=~\"$connector\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Batch Size (Avg.)", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 6 + }, + "height": null, + "hideTimeOverride": false, + "id": 18, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_connector_task_metrics_batch_size_max{namespace=\"$ns\",app=\"$connect_app\",pod=~\"$connect_worker\",connector=~\"$connector\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Batch Size (Max.)", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 7 + }, + "height": null, + "hideTimeOverride": false, + "id": 19, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_connector_task_metrics_offset_commit_success_percentage{namespace=\"$ns\",app=\"$connect_app\",pod=~\"$connect_worker\",connector=~\"$connector\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Offset commit success %", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 7 + }, + "height": null, + "hideTimeOverride": false, + "id": 20, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_connector_task_metrics_offset_commit_avg_time_ms{namespace=\"$ns\",app=\"$connect_app\",pod=~\"$connect_worker\",connector=~\"$connector\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Offset commit avg. latency", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Tasks", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 21, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 22, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_task_error_metrics_total_record_failures{namespace=\"$ns\",app=\"$connect_app\",pod=~\"$connect_worker\",connector=~\"$connector\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Total Record Failures", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 23, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_task_error_metrics_total_record_errors{namespace=\"$ns\",app=\"$connect_app\",pod=~\"$connect_worker\",connector=~\"$connector\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Total Record Error", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 24, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_task_error_metrics_total_records_skipped{namespace=\"$ns\",app=\"$connect_app\",pod=~\"$connect_worker\",connector=~\"$connector\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Total Records Skipped", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 9 + }, + "height": null, + "hideTimeOverride": false, + "id": 25, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_task_error_metrics_total_errors_logged{namespace=\"$ns\",app=\"$connect_app\",pod=~\"$connect_worker\",connector=~\"$connector\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Total Errors Logged", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 9 + }, + "height": null, + "hideTimeOverride": false, + "id": 26, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_task_error_metrics_total_retries{namespace=\"$ns\",app=\"$connect_app\",pod=~\"$connect_worker\",connector=~\"$connector\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Total Retries", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 9 + }, + "height": null, + "hideTimeOverride": false, + "id": 27, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_task_error_metrics_deadletterqueue_produce_requests{namespace=\"$ns\",app=\"$connect_app\",pod=~\"$connect_worker\",connector=~\"$connector\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Dead Letter Topic Requests", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Task Errors", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 10 + }, + "height": null, + "hideTimeOverride": false, + "id": 28, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 10 + }, + "height": null, + "hideTimeOverride": false, + "id": 29, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_source_task_metrics_poll_batch_avg_time_ms{namespace=\"$ns\",app=\"$connect_app\",pod=~\"$connect_worker\",connector=~\"$connector\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Poll Batch Avg. Latency", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 10 + }, + "height": null, + "hideTimeOverride": false, + "id": 30, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_source_task_metrics_poll_batch_max_time_ms{namespace=\"$ns\",app=\"$connect_app\",pod=~\"$connect_worker\",connector=~\"$connector\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Poll Batch Max. Latency", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ops" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 11 + }, + "height": null, + "hideTimeOverride": false, + "id": 31, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_source_task_metrics_source_record_poll_rate{namespace=\"$ns\",app=\"$connect_app\",pod=~\"$connect_worker\",connector=~\"$connector\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Source Record Poll Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ops" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 11 + }, + "height": null, + "hideTimeOverride": false, + "id": 32, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_source_task_metrics_source_record_write_rate{namespace=\"$ns\",app=\"$connect_app\",pod=~\"$connect_worker\",connector=~\"$connector\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Source Record Write Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Source Tasks", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 12 + }, + "height": null, + "hideTimeOverride": false, + "id": 33, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 12 + }, + "height": null, + "hideTimeOverride": false, + "id": 34, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_sink_task_metrics_put_batch_avg_time_ms{namespace=\"$ns\",app=\"$connect_app\",pod=~\"$connect_worker\",connector=~\"$connector\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Put Batch Avg. Latency", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 12 + }, + "height": null, + "hideTimeOverride": false, + "id": 35, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_sink_task_metrics_put_batch_max_time_ms{namespace=\"$ns\",app=\"$connect_app\",pod=~\"$connect_worker\",connector=~\"$connector\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Put Batch Max. Latency", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 13 + }, + "height": null, + "hideTimeOverride": false, + "id": 36, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_sink_task_metrics_partition_count{namespace=\"$ns\",app=\"$connect_app\",pod=~\"$connect_worker\",connector=~\"$connector\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Partition Count", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Sink Tasks", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 5 + }, + "height": null, + "hideTimeOverride": false, + "id": 37, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 5 + }, + "height": null, + "hideTimeOverride": false, + "id": 38, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_connect_metrics_incoming_byte_rate{namespace=\"$ns\",app=\"$connect_app\",pod=~\"$connect_worker\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Incoming Byte Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 5 + }, + "height": null, + "hideTimeOverride": false, + "id": 39, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_connect_metrics_outgoing_byte_rate{namespace=\"$ns\",app=\"$connect_app\",pod=~\"$connect_worker\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Outgoing Byte Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 6 + }, + "height": null, + "hideTimeOverride": false, + "id": 40, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_connect_metrics_io_ratio{namespace=\"$ns\",app=\"$connect_app\",pod=~\"$connect_worker\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "IO Ratio", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 6 + }, + "height": null, + "hideTimeOverride": false, + "id": 41, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_connect_metrics_network_io_rate{namespace=\"$ns\",app=\"$connect_app\",pod=~\"$connect_worker\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Network IO Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 7 + }, + "height": null, + "hideTimeOverride": false, + "id": 42, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_connect_metrics_connection_count{namespace=\"$ns\",app=\"$connect_app\",pod=~\"$connect_worker\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Active Connections", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 7 + }, + "height": null, + "hideTimeOverride": false, + "id": 43, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_connect_metrics_successful_authentication_rate{namespace=\"$ns\",app=\"$connect_app\",pod=~\"$connect_worker\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}} (success)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "kafka_connect_connect_metrics_failed_authentication_total{namespace=\"$ns\",app=\"$connect_app\",pod=~\"$connect_worker\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}} (failed)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Authentications", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Connect Workers", + "transformations": [], + "transparent": false, + "type": "row" } ], "refresh": "30s", @@ -976,7 +4090,7 @@ "datasource": "Prometheus", "hide": 0, "includeAll": false, - "label": "Connect group", + "label": "Connect cluster", "multi": false, "name": "connect_app", "options": [], @@ -1029,12 +4143,12 @@ }, "datasource": "Prometheus", "hide": 0, - "includeAll": false, + "includeAll": true, "label": "Connector", - "multi": false, + "multi": true, "name": "connector", "options": [], - "query": "label_values(kafka_connect_connector_task_metrics_pause_ratio{namespace=\"$ns\",app=\"$connector_app\"}, connector)", + "query": "label_values(kafka_connect_connect_worker_metrics_connector_total_task_count{namespace=\"$ns\",app=\"$connect_app\"}, connector)", "refresh": 1, "regex": null, "sort": 1, diff --git a/grafana-dashboards/kafka-connect-cluster.py b/grafana-dashboards/kafka-connect-cluster.py index 5398fc8a..9887c4af 100644 --- a/grafana-dashboards/kafka-connect-cluster.py +++ b/grafana-dashboards/kafka-connect-cluster.py @@ -14,7 +14,7 @@ ), G.Template( name="connect_app", - label="Connect group", + label="Connect cluster", dataSource="Prometheus", query='label_values(kafka_connect_connect_worker_metrics_connector_total_task_count{namespace="$ns"}, app)', ), @@ -30,12 +30,14 @@ name="connector", label="Connector", dataSource="Prometheus", - query='label_values(kafka_connect_connector_task_metrics_pause_ratio{namespace="$ns",app="$connector_app"}, connector)', + query='label_values(kafka_connect_connect_worker_metrics_connector_total_task_count{namespace="$ns",app="$connect_app"}, connector)', + multi=True, + includeAll=True, ), ] ) -hc_base=0 +hc_base = 0 hc_panels = [ G.RowPanel( title="Health-check", @@ -46,8 +48,8 @@ dataSource="${DS_PROMETHEUS}", targets=[ G.Target( - expr='count(kafka_connect_app_info{namespace="$ns",app="$connect_app"})', - legendFormat='{{version}}' + expr='count(kafka_connect_app_info{namespace="$ns",app="$connect_app",version!=""})', + legendFormat="{{version}}", ), ], reduceCalc="last", @@ -121,7 +123,7 @@ dataSource="${DS_PROMETHEUS}", targets=[ G.Target( - expr='kafka_connect_connect_worker_rebalance_metrics_time_since_last_rebalance_ms{namespace="$ns",app=~"$connect_app"} >= 0', + expr='kafka_connect_connect_worker_rebalance_metrics_time_since_last_rebalance_ms{namespace="$ns",app="$connect_app"} >= 0', legendFormat="{{pod}}", ), ], @@ -133,10 +135,205 @@ ], gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 5, y=hc_base), ), + G.Table( + title="Connect Workers", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_connect_app_info{namespace="$ns",app="$connect_app",start_time_ms!=""}', + format="table", + instant=True, + ), + G.Target( + expr='kafka_connect_app_info{namespace="$ns",app="$connect_app",version!=""}', + format="table", + instant=True, + ), + G.Target( + expr='sum by (pod) (kafka_connect_connect_worker_metrics_connector_count{namespace="$ns",app="$connect_app"})', + format="table", + instant=True, + ), + G.Target( + expr='sum by (pod) (kafka_connect_connect_worker_metrics_connector_startup_success_total{namespace="$ns",app="$connect_app"})', + format="table", + instant=True, + ), + G.Target( + expr='sum by (pod) (kafka_connect_connect_worker_metrics_connector_startup_failure_total{namespace="$ns",app="$connect_app"})', + format="table", + instant=True, + ), + G.Target( + expr='sum by (pod) (kafka_connect_connect_worker_metrics_task_count{namespace="$ns",app="$connect_app"})', + format="table", + instant=True, + ), + G.Target( + expr='sum by (pod) (kafka_connect_connect_worker_metrics_task_startup_success_total{namespace="$ns",app="$connect_app"})', + format="table", + instant=True, + ), + G.Target( + expr='sum by (pod) (kafka_connect_connect_worker_metrics_task_startup_failure_total{namespace="$ns",app="$connect_app"})', + format="table", + instant=True, + ), + ], + transformations=[ + {"id": "seriesToColumns", "options": {"byField": "pod"}}, + { + "id": "filterFieldsByName", + "options": { + "include": { + "names": [ + "pod", + "app 1", + "start_time_ms", + "version", + "Value #C", + "Value #D", + "Value #E", + "Value #F", + "Value #G", + "Value #H", + "namespace 1", + ] + } + }, + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "app 1": 1, + "namespace 1": 0, + "pod": 2, + "start_time_ms": 3, + "version": 4, + }, + "renameByName": { + "Value #C": "connectors", + "Value #D": "conn. success", + "Value #E": "conn. failure", + "Value #F": "tasks", + "Value #G": "tasks success", + "Value #H": "tasks failure", + "app 1": "cluster", + "namespace 1": "namespace", + "pod": "worker", + "start_time_ms": "start time", + "version": "version", + }, + }, + }, + { + "id": "convertFieldType", + "options": { + "conversions": [ + {"destinationType": "number", "targetField": "start_time_ms"} + ], + "fields": {}, + }, + }, + ], + gridPos=G.GridPos(h=defaultHeight, w=24, x=0, y=hc_base + 1), + ), + G.Table( + title="Connectors", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_connect_connector_info{namespace="$ns",app="$connect_app"}', + format="table", + instant=True, + ), + G.Target( + expr='sum by (connector) (kafka_connect_connect_worker_metrics_connector_total_task_count{namespace="$ns",app="$connect_app"})', + format="table", + instant=True, + ), + G.Target( + expr='sum by (connector) (kafka_connect_connect_worker_metrics_connector_running_task_count{namespace="$ns",app="$connect_app"})', + format="table", + instant=True, + ), + G.Target( + expr='sum by (connector) (kafka_connect_connect_worker_metrics_connector_failed_task_count{namespace="$ns",app="$connect_app"})', + format="table", + instant=True, + ), + G.Target( + expr='sum by (connector) (kafka_connect_connect_worker_metrics_connector_paused_task_count{namespace="$ns",app="$connect_app"})', + format="table", + instant=True, + ), + ], + transformations=[ + {"id": "seriesToColumns", "options": {"byField": "connector"}}, + { + "id": "filterFieldsByName", + "options": { + "include": { + "names": [ + "connector", + "Value #B", + "Value #C", + "Value #D", + "Value #E", + ] + } + }, + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "renameByName": { + "Value #B": "tasks", + "Value #C": "running", + "Value #D": "failed", + "Value #E": "paused", + }, + }, + }, + ], + gridPos=G.GridPos(h=defaultHeight, w=24, x=0, y=hc_base + 2), + ), + + G.TimeSeries( + title="Tasks Running Ratio", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_connect_connector_task_metrics_running_ratio{namespace="$ns",app="$connect_app"}', + legendFormat="{{connector}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="percentunit", + gridPos=G.GridPos(h=defaultHeight * 2, w=12, x=0, y=hc_base + 3), + ), + G.TimeSeries( + title="Rebalance Latency", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_connect_connect_worker_rebalance_metrics_rebalance_avg_time_ms{namespace="$ns",app="$connect_app"}', + legendFormat="{{pod}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos(h=defaultHeight * 2, w=12, x=12, y=hc_base + 3), + ), ] -system_base=hc_base + 1; -system_panels=[ +system_base = hc_base + 4 +system_panels = [ G.RowPanel( title="System", gridPos=G.GridPos(h=1, w=24, x=0, y=system_base), @@ -185,7 +382,427 @@ ), ] -panels = hc_panels + system_panels +worker_base = system_base + 1 +worker_inner = [ + G.TimeSeries( + title="Incoming Byte Rate", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_connect_connect_metrics_incoming_byte_rate{namespace="$ns",app="$connect_app",pod=~"$connect_worker"}', + legendFormat="{{pod}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="binBps", + gridPos=G.GridPos(h=defaultHeight * 2, w=tsWidth, x=tsWidth * 0, y=worker_base), + ), + G.TimeSeries( + title="Outgoing Byte Rate", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_connect_connect_metrics_outgoing_byte_rate{namespace="$ns",app="$connect_app",pod=~"$connect_worker"}', + legendFormat="{{pod}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="binBps", + gridPos=G.GridPos(h=defaultHeight * 2, w=tsWidth, x=tsWidth * 1, y=worker_base), + ), + G.TimeSeries( + title="IO Ratio", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_connect_connect_metrics_io_ratio{namespace="$ns",app="$connect_app",pod=~"$connect_worker"}', + legendFormat="{{pod}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="percentunit", + gridPos=G.GridPos( + h=defaultHeight * 2, w=tsWidth, x=tsWidth * 0, y=worker_base + 1 + ), + ), + G.TimeSeries( + title="Network IO Rate", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_connect_connect_metrics_network_io_rate{namespace="$ns",app="$connect_app",pod=~"$connect_worker"}', + legendFormat="{{pod}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="binBps", + gridPos=G.GridPos( + h=defaultHeight * 2, w=tsWidth, x=tsWidth * 1, y=worker_base + 1 + ), + ), + G.TimeSeries( + title="Active Connections", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_connect_connect_metrics_connection_count{namespace="$ns",app="$connect_app",pod=~"$connect_worker"}', + legendFormat="{{pod}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=defaultHeight * 2, w=tsWidth, x=tsWidth * 0, y=worker_base + 2 + ), + ), + G.TimeSeries( + title="Authentications", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_connect_connect_metrics_successful_authentication_rate{namespace="$ns",app="$connect_app",pod=~"$connect_worker"}', + legendFormat="{{pod}} (success)", + ), + G.Target( + expr='kafka_connect_connect_metrics_failed_authentication_total{namespace="$ns",app="$connect_app",pod=~"$connect_worker"}', + legendFormat="{{pod}} (failed)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=defaultHeight * 2, w=tsWidth, x=tsWidth * 1, y=worker_base + 2 + ), + ), +] +worker_panels = [ + G.RowPanel( + title="Connect Workers", + gridPos=G.GridPos(h=1, w=24, x=0, y=worker_base), + collapsed=True, + panels=worker_inner, + ), +] + +tasks_base = worker_base + 1 +tasks_inner = [ + G.TimeSeries( + title="Batch Size (Avg.)", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_connect_connector_task_metrics_batch_size_avg{namespace="$ns",app="$connect_app",pod=~"$connect_worker",connector=~"$connector"}', + legendFormat="{{connector}}[{{task}}]", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="bytes", + gridPos=G.GridPos( + h=defaultHeight * 2, w=tsWidth, x=tsWidth * 0, y=tasks_base + ), + ), + G.TimeSeries( + title="Batch Size (Max.)", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_connect_connector_task_metrics_batch_size_max{namespace="$ns",app="$connect_app",pod=~"$connect_worker",connector=~"$connector"}', + legendFormat="{{connector}}[{{task}}]", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="bytes", + gridPos=G.GridPos( + h=defaultHeight * 2, w=tsWidth, x=tsWidth * 1, y=tasks_base + ), + ), + + G.TimeSeries( + title="Offset commit success %", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_connect_connector_task_metrics_offset_commit_success_percentage{namespace="$ns",app="$connect_app",pod=~"$connect_worker",connector=~"$connector"}', + legendFormat="{{connector}}[{{task}}]", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="percentunit", + gridPos=G.GridPos( + h=defaultHeight * 2, w=tsWidth, x=tsWidth * 0, y=tasks_base + 1 + ), + ), + G.TimeSeries( + title="Offset commit avg. latency", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_connect_connector_task_metrics_offset_commit_avg_time_ms{namespace="$ns",app="$connect_app",pod=~"$connect_worker",connector=~"$connector"}', + legendFormat="{{connector}}[{{task}}]", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=defaultHeight * 2, w=tsWidth, x=tsWidth * 1, y=tasks_base + 1 + ), + ), +] +tasks_panels = [ + G.RowPanel( + title="Tasks", + gridPos=G.GridPos(h=1, w=24, x=0, y=tasks_base), + collapsed=True, + panels=tasks_inner, + ), +] + +task_errors_base = tasks_base + 2 +task_errors_inner = [ + G.TimeSeries( + title="Total Record Failures", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_connect_task_error_metrics_total_record_failures{namespace="$ns",app="$connect_app",pod=~"$connect_worker",connector=~"$connector"}', + legendFormat="{{connector}}[{{task}}]", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=defaultHeight * 2, w=tsWidth, x=tsWidth * 0, y=task_errors_base + ), + ), + G.TimeSeries( + title="Total Record Error", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_connect_task_error_metrics_total_record_errors{namespace="$ns",app="$connect_app",pod=~"$connect_worker",connector=~"$connector"}', + legendFormat="{{connector}}[{{task}}]", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=defaultHeight * 2, w=tsWidth, x=tsWidth * 1, y=task_errors_base + ), + ), + G.TimeSeries( + title="Total Records Skipped", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_connect_task_error_metrics_total_records_skipped{namespace="$ns",app="$connect_app",pod=~"$connect_worker",connector=~"$connector"}', + legendFormat="{{connector}}[{{task}}]", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=defaultHeight * 2, w=tsWidth, x=tsWidth * 2, y=task_errors_base + ), + ), + G.TimeSeries( + title="Total Errors Logged", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_connect_task_error_metrics_total_errors_logged{namespace="$ns",app="$connect_app",pod=~"$connect_worker",connector=~"$connector"}', + legendFormat="{{connector}}[{{task}}]", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=defaultHeight * 2, w=tsWidth, x=tsWidth * 0, y=task_errors_base + 1 + ), + ), + G.TimeSeries( + title="Total Retries", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_connect_task_error_metrics_total_retries{namespace="$ns",app="$connect_app",pod=~"$connect_worker",connector=~"$connector"}', + legendFormat="{{connector}}[{{task}}]", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=defaultHeight * 2, w=tsWidth, x=tsWidth * 1, y=task_errors_base + 1 + ), + ), + G.TimeSeries( + title="Dead Letter Topic Requests", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_connect_task_error_metrics_deadletterqueue_produce_requests{namespace="$ns",app="$connect_app",pod=~"$connect_worker",connector=~"$connector"}', + legendFormat="{{connector}}[{{task}}]", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=defaultHeight * 2, w=tsWidth, x=tsWidth * 2, y=task_errors_base + 1 + ), + ), +] +task_errors_panels = [ + G.RowPanel( + title="Task Errors", + gridPos=G.GridPos(h=1, w=24, x=0, y=task_errors_base), + collapsed=True, + panels=task_errors_inner, + ), +] + +source_base = task_errors_base + 2 +source_inner = [ + G.TimeSeries( + title="Poll Batch Avg. Latency", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_connect_source_task_metrics_poll_batch_avg_time_ms{namespace="$ns",app="$connect_app",pod=~"$connect_worker",connector=~"$connector"}', + legendFormat="{{connector}}[{{task}}]", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=defaultHeight * 2, w=tsWidth, x=tsWidth * 0, y=source_base + ), + ), + G.TimeSeries( + title="Poll Batch Max. Latency", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_connect_source_task_metrics_poll_batch_max_time_ms{namespace="$ns",app="$connect_app",pod=~"$connect_worker",connector=~"$connector"}', + legendFormat="{{connector}}[{{task}}]", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=defaultHeight * 2, w=tsWidth, x=tsWidth * 1, y=source_base + ), + ), + G.TimeSeries( + title="Source Record Poll Rate", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_connect_source_task_metrics_source_record_poll_rate{namespace="$ns",app="$connect_app",pod=~"$connect_worker",connector=~"$connector"}', + legendFormat="{{connector}}[{{task}}]", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ops", + gridPos=G.GridPos( + h=defaultHeight * 2, w=tsWidth, x=tsWidth * 0, y=source_base + 1 + ), + ), + G.TimeSeries( + title="Source Record Write Rate", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_connect_source_task_metrics_source_record_write_rate{namespace="$ns",app="$connect_app",pod=~"$connect_worker",connector=~"$connector"}', + legendFormat="{{connector}}[{{task}}]", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ops", + gridPos=G.GridPos( + h=defaultHeight * 2, w=tsWidth, x=tsWidth * 1, y=source_base + 1 + ), + ), +] +source_panels = [ + G.RowPanel( + title="Source Tasks", + gridPos=G.GridPos(h=1, w=24, x=0, y=source_base), + collapsed=True, + panels=source_inner, + ), +] + +sink_base = source_base + 2 +sink_inner = [ + G.TimeSeries( + title="Put Batch Avg. Latency", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_connect_sink_task_metrics_put_batch_avg_time_ms{namespace="$ns",app="$connect_app",pod=~"$connect_worker",connector=~"$connector"}', + legendFormat="{{connector}}[{{task}}]", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=defaultHeight * 2, w=tsWidth, x=tsWidth * 0, y=sink_base + ), + ), + G.TimeSeries( + title="Put Batch Max. Latency", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_connect_sink_task_metrics_put_batch_max_time_ms{namespace="$ns",app="$connect_app",pod=~"$connect_worker",connector=~"$connector"}', + legendFormat="{{connector}}[{{task}}]", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=defaultHeight * 2, w=tsWidth, x=tsWidth * 1, y=sink_base + ), + ), + G.TimeSeries( + title="Partition Count", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_connect_sink_task_metrics_partition_count{namespace="$ns",app="$connect_app",pod=~"$connect_worker",connector=~"$connector"}', + legendFormat="{{connector}}[{{task}}]", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=defaultHeight * 2, w=tsWidth, x=tsWidth * 0, y=sink_base + 1 + ), + ), +] +sink_panels = [ + G.RowPanel( + title="Sink Tasks", + gridPos=G.GridPos(h=1, w=24, x=0, y=sink_base), + collapsed=True, + panels=sink_inner, + ), +] + +panels = hc_panels + system_panels + tasks_panels + task_errors_panels + source_panels + sink_panels + worker_panels dashboard = G.Dashboard( title="Kafka Connect cluster - v2", diff --git a/grafana-dashboards/kafka-topics.py b/grafana-dashboards/kafka-topics.py index 2fc819c2..0a643ada 100644 --- a/grafana-dashboards/kafka-topics.py +++ b/grafana-dashboards/kafka-topics.py @@ -202,7 +202,9 @@ ], filterable=True, transformations=offsets_txs, - gridPos=G.GridPos(h=defaultHeight, w=tableWidth, x=tableWidth * 0, y=offsets_base), + gridPos=G.GridPos( + h=defaultHeight, w=tableWidth, x=tableWidth * 0, y=offsets_base + ), ), G.Table( title="End Offsets", @@ -217,7 +219,9 @@ ], filterable=True, transformations=offsets_txs, - gridPos=G.GridPos(h=defaultHeight, w=tableWidth, x=tableWidth * 1, y=offsets_base), + gridPos=G.GridPos( + h=defaultHeight, w=tableWidth, x=tableWidth * 1, y=offsets_base + ), ), ] offsets_panels = [ diff --git a/grafana-dashboards/schema-registry-cluster.json b/grafana-dashboards/schema-registry-cluster.json index d090a584..973062cd 100644 --- a/grafana-dashboards/schema-registry-cluster.json +++ b/grafana-dashboards/schema-registry-cluster.json @@ -222,7 +222,7 @@ "targets": [ { "datasource": null, - "expr": "avg(kafka_schema_registry_registered_count{namespace=\"$ns\"}) by (schema_type)", + "expr": "avg(kafka_schema_registry_registered_count{namespace=\"$ns\"})", "format": "time_series", "hide": false, "instant": true, @@ -237,7 +237,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "SR: Sum of Registered Schemas by Type", + "title": "SR: Sum of Registered Schemas", "transformations": [], "transparent": false, "type": "stat" @@ -320,7 +320,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "SR: Sum of Created Schemas", + "title": "SR: Sum of Created Schemas by Type", "transformations": [], "transparent": false, "type": "stat" @@ -403,7 +403,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "SR: Sum of Deleted Schemas", + "title": "SR: Sum of Deleted Schemas by Type", "transformations": [], "transparent": false, "type": "stat" diff --git a/grafana-dashboards/schema-registry-cluster.py b/grafana-dashboards/schema-registry-cluster.py index cec8e70e..ef8c3bdd 100644 --- a/grafana-dashboards/schema-registry-cluster.py +++ b/grafana-dashboards/schema-registry-cluster.py @@ -15,8 +15,8 @@ ] ) -healthcheck_base=0 -healthcheck_panels=[ +healthcheck_base = 0 +healthcheck_panels = [ G.RowPanel( title="Health-check", gridPos=G.GridPos(h=1, w=24, x=0, y=0), @@ -35,14 +35,16 @@ G.Threshold(index=1, value=1.0, color="yellow"), G.Threshold(index=2, value=2.0, color="green"), ], - gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 0, y=healthcheck_base), + gridPos=G.GridPos( + h=defaultHeight, w=statWidth, x=statWidth * 0, y=healthcheck_base + ), ), G.Stat( - title="SR: Sum of Registered Schemas by Type", + title="SR: Sum of Registered Schemas", dataSource="${DS_PROMETHEUS}", targets=[ G.Target( - expr='avg(kafka_schema_registry_registered_count{namespace="$ns"}) by (schema_type)', + expr='avg(kafka_schema_registry_registered_count{namespace="$ns"})', instant=True, ), ], @@ -50,10 +52,12 @@ thresholds=[ G.Threshold(index=0, value=0.0, color="blue"), ], - gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 1, y=healthcheck_base), + gridPos=G.GridPos( + h=defaultHeight, w=statWidth, x=statWidth * 1, y=healthcheck_base + ), ), G.Stat( - title="SR: Sum of Created Schemas", + title="SR: Sum of Created Schemas by Type", dataSource="${DS_PROMETHEUS}", targets=[ G.Target( @@ -65,10 +69,12 @@ thresholds=[ G.Threshold(index=0, value=0.0, color="blue"), ], - gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 2, y=healthcheck_base), + gridPos=G.GridPos( + h=defaultHeight, w=statWidth, x=statWidth * 2, y=healthcheck_base + ), ), G.Stat( - title="SR: Sum of Deleted Schemas", + title="SR: Sum of Deleted Schemas by Type", dataSource="${DS_PROMETHEUS}", targets=[ G.Target( @@ -80,26 +86,29 @@ thresholds=[ G.Threshold(index=0, value=0.0, color="blue"), ], - gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 3, y=healthcheck_base), + gridPos=G.GridPos( + h=defaultHeight, w=statWidth, x=statWidth * 3, y=healthcheck_base + ), ), G.Stat( title="SR: Sum of Active Connections", dataSource="${DS_PROMETHEUS}", targets=[ G.Target( - expr='sum(kafka_schema_registry_kafka_schema_registry_metrics_connection_count)', + expr="sum(kafka_schema_registry_kafka_schema_registry_metrics_connection_count)", ), ], reduceCalc="last", thresholds=[ G.Threshold(index=0, value=0.0, color="blue"), ], - gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 4, y=healthcheck_base), + gridPos=G.GridPos( + h=defaultHeight, w=statWidth, x=statWidth * 4, y=healthcheck_base + ), ), - ] -system_panels=[ +system_panels = [ G.RowPanel( title="System", gridPos=G.GridPos(h=1, w=24, x=0, y=1), From 6b5cdc2e9b42bb53e70bc0e88b8575bde1687c45 Mon Sep 17 00:00:00 2001 From: Jorge Esteban Quilcate Otoya Date: Fri, 8 Jul 2022 15:53:25 +0100 Subject: [PATCH 18/28] feat: ksqldb dashboard --- grafana-dashboards/Makefile | 1 + grafana-dashboards/ksqldb-cluster.json | 3683 ++++++++++++++++++++++++ grafana-dashboards/ksqldb-cluster.py | 588 ++++ 3 files changed, 4272 insertions(+) create mode 100644 grafana-dashboards/ksqldb-cluster.json create mode 100644 grafana-dashboards/ksqldb-cluster.py diff --git a/grafana-dashboards/Makefile b/grafana-dashboards/Makefile index f8bb8246..8ddb1cff 100644 --- a/grafana-dashboards/Makefile +++ b/grafana-dashboards/Makefile @@ -5,3 +5,4 @@ all: generate-dashboard -o kafka-topics.json kafka-topics.py generate-dashboard -o schema-registry-cluster.json schema-registry-cluster.py generate-dashboard -o kafka-connect-cluster.json kafka-connect-cluster.py + generate-dashboard -o ksqldb-cluster.json ksqldb-cluster.py diff --git a/grafana-dashboards/ksqldb-cluster.json b/grafana-dashboards/ksqldb-cluster.json new file mode 100644 index 00000000..194352c0 --- /dev/null +++ b/grafana-dashboards/ksqldb-cluster.json @@ -0,0 +1,3683 @@ +{ + "__inputs": [ + { + "description": "", + "label": "Prometheus", + "name": "DS_PROMETHEUS", + "pluginId": "prometheus", + "pluginName": "Prometheus", + "type": "datasource" + } + ], + "annotations": { + "list": [] + }, + "description": "Overview of ksqlDB clusters.", + "editable": true, + "gnetId": null, + "hideControls": false, + "id": null, + "links": [], + "panels": [ + { + "cacheTimeout": null, + "collapsed": false, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 1, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Overview", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 2, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "count(ksql_ksql_engine_query_stats_num_active_queries{namespace=\"$ns\", app=\"$ksqldb_app\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ksqlDB: Online Servers", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 4, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 3, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(ksql_ksql_engine_query_stats_num_active_queries{namespace=\"$ns\", app=\"$ksqldb_app\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ksqlDB: Sum of Active Queries", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "green", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 8, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 4, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(ksql_ksql_engine_query_stats_running_queries{namespace=\"$ns\", app=\"$ksqldb_app\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ksqlDB: Sum of Running Queries", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 12, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 5, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(ksql_ksql_engine_query_stats_rebalancing_queries{namespace=\"$ns\", app=\"$ksqldb_app\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ksqlDB: Sum of Rebalancing Queries", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "red", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 16, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 6, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "avg(ksql_ksql_engine_query_stats_error_queries{namespace=\"$ns\", app=\"$ksqldb_app\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connect: Sum of Failed Queries", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 7, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "ksql_ksql_engine_query_stats_liveness_indicator{namespace=\"$ns\",app=\"$ksqldb_app\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Cluster Liveness", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 8, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "ksql_ksql_engine_query_stats_messages_consumed_per_sec{namespace=\"$ns\",app=\"$ksqldb_app\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Messages consumed/sec", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 9, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "ksql_ksql_engine_query_stats_messages_produced_per_sec{namespace=\"$ns\",app=\"$ksqldb_app\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Messages produced/sec", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "collapsed": false, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 10, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "System", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 11, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "irate(process_cpu_seconds_total{namespace=\"$ns\",app=\"$ksqldb_app\",type=\"ksqldb\"}[5m])", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU usage", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 12, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum without(area)(jvm_memory_bytes_used{namespace=\"$ns\",app=\"$ksqldb_app\",type=\"ksqldb\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory usage", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 13, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum without(gc)(irate(jvm_gc_collection_seconds_sum{namespace=\"$ns\",app=\"$ksqldb_app\",type=\"ksqldb\"}[5m]))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "GC collection", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 14, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 15, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_streams_stream_thread_metrics_poll_latency_avg{namespace=\"$ns\",app=\"$ksqldb_app\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Poll Latency (Avg.)", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 16, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_streams_stream_thread_metrics_poll_latency_max{namespace=\"$ns\",app=\"$ksqldb_app\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Poll Latency (Max.)", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 17, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_streams_stream_thread_metrics_process_latency_avg{namespace=\"$ns\",app=\"$ksqldb_app\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Process Latency (Avg.)", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 18, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_streams_stream_thread_metrics_process_latency_max{namespace=\"$ns\",app=\"$ksqldb_app\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Process Latency (Max.)", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 5 + }, + "height": null, + "hideTimeOverride": false, + "id": 19, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_streams_stream_thread_metrics_commit_latency_avg{namespace=\"$ns\",app=\"$ksqldb_app\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Commit Latency (Avg.)", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 5 + }, + "height": null, + "hideTimeOverride": false, + "id": 20, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_streams_stream_thread_metrics_commit_latency_max{namespace=\"$ns\",app=\"$ksqldb_app\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Commit Latency (Max.)", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 6 + }, + "height": null, + "hideTimeOverride": false, + "id": 21, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_streams_stream_thread_metrics_punctuate_latency_avg{namespace=\"$ns\",app=\"$ksqldb_app\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Punctuate Latency (Avg.)", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 6 + }, + "height": null, + "hideTimeOverride": false, + "id": 22, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_streams_stream_thread_metrics_punctuate_latency_max{namespace=\"$ns\",app=\"$ksqldb_app\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Punctuate Latency (Max.)", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Queries Performance", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 7 + }, + "height": null, + "hideTimeOverride": false, + "id": 23, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "iops" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 7 + }, + "height": null, + "hideTimeOverride": false, + "id": 24, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_put_rate{namespace=\"$ns\",app=\"$ksqldb_app\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Put Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 7 + }, + "height": null, + "hideTimeOverride": false, + "id": 25, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_put_latency_avg{namespace=\"$ns\",app=\"$ksqldb_app\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Put Latency (Avg.)", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 7 + }, + "height": null, + "hideTimeOverride": false, + "id": 26, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_put_latency_max{namespace=\"$ns\",app=\"$ksqldb_app\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Put Latency (Max.)", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "iops" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 27, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_put_if_absent_rate{namespace=\"$ns\",app=\"$ksqldb_app\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Put if absent Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 28, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_put_if_absent_latency_avg{namespace=\"$ns\",app=\"$ksqldb_app\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Put if absent Latency (Avg.)", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 29, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_put_if_absent_latency_max{namespace=\"$ns\",app=\"$ksqldb_app\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Put if absent Latency (Max.)", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "iops" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 9 + }, + "height": null, + "hideTimeOverride": false, + "id": 30, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_fetch_rate{namespace=\"$ns\",app=\"$ksqldb_app\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Fetch Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 9 + }, + "height": null, + "hideTimeOverride": false, + "id": 31, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_fetch_latency_avg{namespace=\"$ns\",app=\"$ksqldb_app\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Fetch Latency (Avg.)", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 9 + }, + "height": null, + "hideTimeOverride": false, + "id": 32, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_fetch_latency_max{namespace=\"$ns\",app=\"$ksqldb_app\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Fetch Latency (Max.)", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "iops" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 10 + }, + "height": null, + "hideTimeOverride": false, + "id": 33, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_delete_rate{namespace=\"$ns\",app=\"$ksqldb_app\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Delete Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 10 + }, + "height": null, + "hideTimeOverride": false, + "id": 34, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_delete_latency_avg{namespace=\"$ns\",app=\"$ksqldb_app\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Delete Latency (Avg.)", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 10 + }, + "height": null, + "hideTimeOverride": false, + "id": 35, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_delete_latency_max{namespace=\"$ns\",app=\"$ksqldb_app\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Delete Latency (Max.)", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "iops" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 11 + }, + "height": null, + "hideTimeOverride": false, + "id": 36, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_restore_rate{namespace=\"$ns\",app=\"$ksqldb_app\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Restore Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 11 + }, + "height": null, + "hideTimeOverride": false, + "id": 37, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_restore_latency_avg{namespace=\"$ns\",app=\"$ksqldb_app\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Restore Latency (Avg.)", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 11 + }, + "height": null, + "hideTimeOverride": false, + "id": 38, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_restore_latency_max{namespace=\"$ns\",app=\"$ksqldb_app\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Restore Latency (Max.)", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "State Stores", + "transformations": [], + "transparent": false, + "type": "row" + } + ], + "refresh": "30s", + "rows": [], + "schemaVersion": 12, + "sharedCrosshair": false, + "style": "dark", + "tags": [ + "confluent", + "ksqldb" + ], + "templating": { + "list": [ + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "Prometheus", + "hide": 0, + "includeAll": false, + "label": "Namespace", + "multi": false, + "name": "ns", + "options": [], + "query": "label_values(namespace)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "Prometheus", + "hide": 0, + "includeAll": false, + "label": "ksqlDB cluster", + "multi": false, + "name": "ksqldb_app", + "options": [], + "query": "label_values(ksql_ksql_engine_query_stats_num_active_queries{namespace=\"$ns\"},app)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "Prometheus", + "hide": true, + "includeAll": false, + "label": "ksqlDB cluster ID", + "multi": false, + "name": "ksqldb_cluster_id", + "options": [], + "query": "label_values(ksql_ksql_engine_query_stats_num_active_queries{namespace=\"$ns\"},ksql_cluster)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "Prometheus", + "hide": 0, + "includeAll": true, + "label": "ksqlDB server", + "multi": true, + "name": "ksqldb_server", + "options": [], + "query": "label_values(ksql_ksql_engine_query_stats_num_active_queries{namespace=\"$ns\",app=\"$ksqldb_app\"}, pod)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "hidden": false, + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "ksqlDB cluster - v2", + "uid": null, + "version": 0 +} diff --git a/grafana-dashboards/ksqldb-cluster.py b/grafana-dashboards/ksqldb-cluster.py new file mode 100644 index 00000000..ffe91977 --- /dev/null +++ b/grafana-dashboards/ksqldb-cluster.py @@ -0,0 +1,588 @@ + +import grafanalib.core as G + +defaultHeight = 5 +statWidth = 4 +tsWidth = 8 + +templating = G.Templating( + list=[ + G.Template( + name="ns", + label="Namespace", + dataSource="Prometheus", + query="label_values(namespace)", + ), + G.Template( + name="ksqldb_app", + label="ksqlDB cluster", + dataSource="Prometheus", + query='label_values(ksql_ksql_engine_query_stats_num_active_queries{namespace="$ns"},app)', + ), + G.Template( + name="ksqldb_cluster_id", + label="ksqlDB cluster ID", + dataSource="Prometheus", + query='label_values(ksql_ksql_engine_query_stats_num_active_queries{namespace="$ns"},ksql_cluster)', + hide=True, + ), + G.Template( + name="ksqldb_server", + label="ksqlDB server", + dataSource="Prometheus", + query='label_values(ksql_ksql_engine_query_stats_num_active_queries{namespace="$ns",app="$ksqldb_app"}, pod)', + multi=True, + includeAll=True, + ), + ] +) + + +hc_base = 0 +hc_panels = [ + G.RowPanel( + title="Overview", + gridPos=G.GridPos(h=1, w=24, x=0, y=hc_base), + ), + G.Stat( + title="ksqlDB: Online Servers", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='count(ksql_ksql_engine_query_stats_num_active_queries{namespace="$ns", app="$ksqldb_app"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 0, y=hc_base), + ), + G.Stat( + title="ksqlDB: Sum of Active Queries", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='sum(ksql_ksql_engine_query_stats_num_active_queries{namespace="$ns", app="$ksqldb_app"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 1, y=hc_base), + ), + G.Stat( + title="ksqlDB: Sum of Running Queries", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='sum(ksql_ksql_engine_query_stats_running_queries{namespace="$ns", app="$ksqldb_app"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + G.Threshold(index=1, value=1.0, color="green"), + ], + gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 2, y=hc_base), + ), + G.Stat( + title="ksqlDB: Sum of Rebalancing Queries", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='sum(ksql_ksql_engine_query_stats_rebalancing_queries{namespace="$ns", app="$ksqldb_app"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + G.Threshold(index=1, value=1.0, color="yellow"), + ], + gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 3, y=hc_base), + ), + G.Stat( + title="Connect: Sum of Failed Queries", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='avg(ksql_ksql_engine_query_stats_error_queries{namespace="$ns", app="$ksqldb_app"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + G.Threshold(index=1, value=1.0, color="red"), + ], + gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 4, y=hc_base), + ), + + G.TimeSeries( + title="Cluster Liveness", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='ksql_ksql_engine_query_stats_liveness_indicator{namespace="$ns",app="$ksqldb_app"}', + legendFormat="{{pod}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos(h=defaultHeight * 2, w=tsWidth, x=0, y=hc_base + 1), + ), + G.TimeSeries( + title="Messages consumed/sec", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='ksql_ksql_engine_query_stats_messages_consumed_per_sec{namespace="$ns",app="$ksqldb_app"}', + legendFormat="{{pod}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="cps", + gridPos=G.GridPos(h=defaultHeight * 2, w=tsWidth, x=tsWidth * 1, y=hc_base + 1), + ), + G.TimeSeries( + title="Messages produced/sec", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='ksql_ksql_engine_query_stats_messages_produced_per_sec{namespace="$ns",app="$ksqldb_app"}', + legendFormat="{{pod}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="cps", + gridPos=G.GridPos(h=defaultHeight * 2, w=tsWidth, x=tsWidth * 2, y=hc_base + 1), + ), +] + +system_base = hc_base + 2 +system_panels = [ + G.RowPanel( + title="System", + gridPos=G.GridPos(h=1, w=24, x=0, y=system_base), + ), + G.TimeSeries( + title="CPU usage", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='irate(process_cpu_seconds_total{namespace="$ns",app="$ksqldb_app",type="ksqldb"}[5m])', + legendFormat="{{pod}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="percentunit", + gridPos=G.GridPos(h=defaultHeight * 2, w=tsWidth, x=tsWidth * 0, y=system_base), + ), + G.TimeSeries( + title="Memory usage", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='sum without(area)(jvm_memory_bytes_used{namespace="$ns",app="$ksqldb_app",type="ksqldb"})', + legendFormat="{{pod}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="bytes", + gridPos=G.GridPos(h=defaultHeight * 2, w=tsWidth, x=tsWidth * 1, y=system_base), + ), + G.TimeSeries( + title="GC collection", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='sum without(gc)(irate(jvm_gc_collection_seconds_sum{namespace="$ns",app="$ksqldb_app",type="ksqldb"}[5m]))', + legendFormat="{{pod}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="percentunit", + gridPos=G.GridPos(h=defaultHeight * 2, w=tsWidth, x=tsWidth * 2, y=system_base), + ), +] + +queries_base = system_base + 1 +queries_inner = [ + G.TimeSeries( + title="Poll Latency (Avg.)", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_streams_stream_thread_metrics_poll_latency_avg{namespace="$ns",app="$ksqldb_app",pod=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + legendFormat="{{thread_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos(h=defaultHeight * 2, w=tsWidth, x=tsWidth * 0, y=queries_base), + ), + G.TimeSeries( + title="Poll Latency (Max.)", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_streams_stream_thread_metrics_poll_latency_max{namespace="$ns",app="$ksqldb_app",pod=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + legendFormat="{{thread_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos(h=defaultHeight * 2, w=tsWidth, x=tsWidth * 1, y=queries_base), + ), + + G.TimeSeries( + title="Process Latency (Avg.)", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_streams_stream_thread_metrics_process_latency_avg{namespace="$ns",app="$ksqldb_app",pod=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + legendFormat="{{thread_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos(h=defaultHeight * 2, w=tsWidth, x=tsWidth * 0, y=queries_base + 1), + ), + G.TimeSeries( + title="Process Latency (Max.)", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_streams_stream_thread_metrics_process_latency_max{namespace="$ns",app="$ksqldb_app",pod=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + legendFormat="{{thread_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos(h=defaultHeight * 2, w=tsWidth, x=tsWidth * 1, y=queries_base + 1), + ), + + G.TimeSeries( + title="Commit Latency (Avg.)", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_streams_stream_thread_metrics_commit_latency_avg{namespace="$ns",app="$ksqldb_app",pod=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + legendFormat="{{thread_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos(h=defaultHeight * 2, w=tsWidth, x=tsWidth * 0, y=queries_base + 2), + ), + G.TimeSeries( + title="Commit Latency (Max.)", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_streams_stream_thread_metrics_commit_latency_max{namespace="$ns",app="$ksqldb_app",pod=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + legendFormat="{{thread_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos(h=defaultHeight * 2, w=tsWidth, x=tsWidth * 1, y=queries_base + 2), + ), + + G.TimeSeries( + title="Punctuate Latency (Avg.)", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_streams_stream_thread_metrics_punctuate_latency_avg{namespace="$ns",app="$ksqldb_app",pod=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + legendFormat="{{thread_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos(h=defaultHeight * 2, w=tsWidth, x=tsWidth * 0, y=queries_base + 3), + ), + G.TimeSeries( + title="Punctuate Latency (Max.)", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_streams_stream_thread_metrics_punctuate_latency_max{namespace="$ns",app="$ksqldb_app",pod=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + legendFormat="{{thread_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos(h=defaultHeight * 2, w=tsWidth, x=tsWidth * 1, y=queries_base + 3), + ), +] +queries_panels = [ + G.RowPanel( + title="Queries Performance", + gridPos=G.GridPos(h=1, w=24, x=0, y=queries_base), + collapsed=True, + panels=queries_inner, + ), +] + +stores_base = queries_base + 4 +stores_inner = [ + G.TimeSeries( + title="Put Rate", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_streams_stream_state_metrics_put_rate{namespace="$ns",app="$ksqldb_app",pod=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + legendFormat="{{thread_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ops", + gridPos=G.GridPos(h=defaultHeight * 2, w=tsWidth, x=tsWidth * 0, y=stores_base + 0), + ), + G.TimeSeries( + title="Put Latency (Avg.)", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_streams_stream_state_metrics_put_latency_avg{namespace="$ns",app="$ksqldb_app",pod=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + legendFormat="{{thread_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos(h=defaultHeight * 2, w=tsWidth, x=tsWidth * 1, y=stores_base + 0), + ), + G.TimeSeries( + title="Put Latency (Max.)", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_streams_stream_state_metrics_put_latency_max{namespace="$ns",app="$ksqldb_app",pod=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + legendFormat="{{thread_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos(h=defaultHeight * 2, w=tsWidth, x=tsWidth * 2, y=stores_base + 0), + ), + + G.TimeSeries( + title="Put if absent Rate", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_streams_stream_state_metrics_put_if_absent_rate{namespace="$ns",app="$ksqldb_app",pod=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + legendFormat="{{thread_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ops", + gridPos=G.GridPos(h=defaultHeight * 2, w=tsWidth, x=tsWidth * 0, y=stores_base + 1), + ), + G.TimeSeries( + title="Put if absent Latency (Avg.)", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_streams_stream_state_metrics_put_if_absent_latency_avg{namespace="$ns",app="$ksqldb_app",pod=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + legendFormat="{{thread_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos(h=defaultHeight * 2, w=tsWidth, x=tsWidth * 1, y=stores_base + 1), + ), + G.TimeSeries( + title="Put if absent Latency (Max.)", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_streams_stream_state_metrics_put_if_absent_latency_max{namespace="$ns",app="$ksqldb_app",pod=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + legendFormat="{{thread_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos(h=defaultHeight * 2, w=tsWidth, x=tsWidth * 2, y=stores_base + 1), + ), + + G.TimeSeries( + title="Fetch Rate", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_streams_stream_state_metrics_fetch_rate{namespace="$ns",app="$ksqldb_app",pod=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + legendFormat="{{thread_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ops", + gridPos=G.GridPos(h=defaultHeight * 2, w=tsWidth, x=tsWidth * 0, y=stores_base + 2), + ), + G.TimeSeries( + title="Fetch Latency (Avg.)", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_streams_stream_state_metrics_fetch_latency_avg{namespace="$ns",app="$ksqldb_app",pod=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + legendFormat="{{thread_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos(h=defaultHeight * 2, w=tsWidth, x=tsWidth * 1, y=stores_base + 2), + ), + G.TimeSeries( + title="Fetch Latency (Max.)", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_streams_stream_state_metrics_fetch_latency_max{namespace="$ns",app="$ksqldb_app",pod=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + legendFormat="{{thread_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos(h=defaultHeight * 2, w=tsWidth, x=tsWidth * 2, y=stores_base + 2), + ), + + G.TimeSeries( + title="Delete Rate", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_streams_stream_state_metrics_delete_rate{namespace="$ns",app="$ksqldb_app",pod=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + legendFormat="{{thread_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ops", + gridPos=G.GridPos(h=defaultHeight * 2, w=tsWidth, x=tsWidth * 0, y=stores_base + 3), + ), + G.TimeSeries( + title="Delete Latency (Avg.)", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_streams_stream_state_metrics_delete_latency_avg{namespace="$ns",app="$ksqldb_app",pod=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + legendFormat="{{thread_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos(h=defaultHeight * 2, w=tsWidth, x=tsWidth * 1, y=stores_base + 3), + ), + G.TimeSeries( + title="Delete Latency (Max.)", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_streams_stream_state_metrics_delete_latency_max{namespace="$ns",app="$ksqldb_app",pod=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + legendFormat="{{thread_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos(h=defaultHeight * 2, w=tsWidth, x=tsWidth * 2, y=stores_base + 3), + ), + + G.TimeSeries( + title="Restore Rate", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_streams_stream_state_metrics_restore_rate{namespace="$ns",app="$ksqldb_app",pod=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + legendFormat="{{thread_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ops", + gridPos=G.GridPos(h=defaultHeight * 2, w=tsWidth, x=tsWidth * 0, y=stores_base + 4), + ), + G.TimeSeries( + title="Restore Latency (Avg.)", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_streams_stream_state_metrics_restore_latency_avg{namespace="$ns",app="$ksqldb_app",pod=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + legendFormat="{{thread_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos(h=defaultHeight * 2, w=tsWidth, x=tsWidth * 1, y=stores_base + 4), + ), + G.TimeSeries( + title="Restore Latency (Max.)", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_streams_stream_state_metrics_restore_latency_max{namespace="$ns",app="$ksqldb_app",pod=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + legendFormat="{{thread_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos(h=defaultHeight * 2, w=tsWidth, x=tsWidth * 2, y=stores_base + 4), + ), +] +stores_panels = [ + G.RowPanel( + title="State Stores", + gridPos=G.GridPos(h=1, w=24, x=0, y=stores_base), + collapsed=True, + panels=stores_inner, + ), +] + +panels = hc_panels + system_panels + queries_panels + stores_panels + +dashboard = G.Dashboard( + title="ksqlDB cluster - v2", + description="Overview of ksqlDB clusters.", + tags=[ + "confluent", + "ksqldb", + ], + inputs=[ + G.DataSourceInput( + name="DS_PROMETHEUS", + label="Prometheus", + pluginId="prometheus", + pluginName="Prometheus", + ) + ], + templating=templating, + timezone="browser", + panels=panels, + refresh="30s", +).auto_panel_ids() From 68d6c3c71dc2f9114baee35892554f5f4ec1b2c7 Mon Sep 17 00:00:00 2001 From: Jorge Esteban Quilcate Otoya Date: Sat, 9 Jul 2022 00:12:40 +0100 Subject: [PATCH 19/28] feat: add variables --- grafana-dashboards/Makefile | 38 +- .../{ => cfk}/confluent-platform.json | 80 +- .../{ => cfk}/kafka-cluster.json | 118 +- .../{ => cfk}/kafka-connect-cluster.json | 128 +- .../{ => cfk}/kafka-topics.json | 22 +- .../{ => cfk}/ksqldb-cluster.json | 92 +- .../{ => cfk}/schema-registry-cluster.json | 47 +- .../{ => cfk}/zookeeper-cluster.json | 67 +- grafana-dashboards/confluent-platform.py | 1039 ++-- .../default/confluent-platform.json | 2640 ++++++++ grafana-dashboards/default/kafka-cluster.json | 5537 +++++++++++++++++ .../default/kafka-connect-cluster.json | 4184 +++++++++++++ grafana-dashboards/default/kafka-topics.json | 1085 ++++ .../default/ksqldb-cluster.json | 3683 +++++++++++ .../default/schema-registry-cluster.json | 937 +++ .../default/zookeeper-cluster.json | 1873 ++++++ grafana-dashboards/kafka-cluster.py | 2049 +++--- grafana-dashboards/kafka-connect-cluster.py | 1846 +++--- grafana-dashboards/kafka-topics.py | 484 +- grafana-dashboards/ksqldb-cluster.py | 1164 ++-- grafana-dashboards/schema-registry-cluster.py | 344 +- grafana-dashboards/zookeeper-cluster.py | 653 +- 22 files changed, 24478 insertions(+), 3632 deletions(-) rename grafana-dashboards/{ => cfk}/confluent-platform.json (97%) rename grafana-dashboards/{ => cfk}/kafka-cluster.json (97%) rename grafana-dashboards/{ => cfk}/kafka-connect-cluster.json (95%) rename grafana-dashboards/{ => cfk}/kafka-topics.json (97%) rename grafana-dashboards/{ => cfk}/ksqldb-cluster.json (96%) rename grafana-dashboards/{ => cfk}/schema-registry-cluster.json (95%) rename grafana-dashboards/{ => cfk}/zookeeper-cluster.json (97%) create mode 100644 grafana-dashboards/default/confluent-platform.json create mode 100644 grafana-dashboards/default/kafka-cluster.json create mode 100644 grafana-dashboards/default/kafka-connect-cluster.json create mode 100644 grafana-dashboards/default/kafka-topics.json create mode 100644 grafana-dashboards/default/ksqldb-cluster.json create mode 100644 grafana-dashboards/default/schema-registry-cluster.json create mode 100644 grafana-dashboards/default/zookeeper-cluster.json diff --git a/grafana-dashboards/Makefile b/grafana-dashboards/Makefile index 8ddb1cff..e37f2f44 100644 --- a/grafana-dashboards/Makefile +++ b/grafana-dashboards/Makefile @@ -1,8 +1,30 @@ -all: - generate-dashboard -o confluent-platform.json confluent-platform.py - generate-dashboard -o zookeeper-cluster.json zookeeper-cluster.py - generate-dashboard -o kafka-cluster.json kafka-cluster.py - generate-dashboard -o kafka-topics.json kafka-topics.py - generate-dashboard -o schema-registry-cluster.json schema-registry-cluster.py - generate-dashboard -o kafka-connect-cluster.json kafka-connect-cluster.py - generate-dashboard -o ksqldb-cluster.json ksqldb-cluster.py +all: + $(MAKE) def + $(MAKE) cfk + +def: OUTPUT_DIR=default +def: export ENV_LABEL=env +def: export SERVER_LABEL=hostname +def: export KSQLDB_CLUSTER_LABEL=ksqldb_cluster_id +def: export CONNECT_CLUSTER_LABEL=kafka_connect_cluster_id +.PHONY: def +def: dashboards + +OUTPUT_DIR=default +dashboards: + @mkdir -p $(OUTPUT_DIR) + @generate-dashboard confluent-platform.py -o $(OUTPUT_DIR)/confluent-platform.json + @generate-dashboard zookeeper-cluster.py -o $(OUTPUT_DIR)/zookeeper-cluster.json + @generate-dashboard kafka-cluster.py -o $(OUTPUT_DIR)/kafka-cluster.json + @generate-dashboard kafka-topics.py -o $(OUTPUT_DIR)/kafka-topics.json + @generate-dashboard schema-registry-cluster.py -o $(OUTPUT_DIR)/schema-registry-cluster.json + @generate-dashboard kafka-connect-cluster.py -o $(OUTPUT_DIR)/kafka-connect-cluster.json + @generate-dashboard ksqldb-cluster.py -o $(OUTPUT_DIR)/ksqldb-cluster.json + +cfk: OUTPUT_DIR=cfk +cfk: export ENV_LABEL=namespace +cfk: export SERVER_LABEL=pod +cfk: export KSQLDB_CLUSTER_LABEL=app +cfk: export CONNECT_CLUSTER_LABEL=app +.PHONY: cfk +cfk: dashboards diff --git a/grafana-dashboards/confluent-platform.json b/grafana-dashboards/cfk/confluent-platform.json similarity index 97% rename from grafana-dashboards/confluent-platform.json rename to grafana-dashboards/cfk/confluent-platform.json index 41710d5e..1605d89b 100644 --- a/grafana-dashboards/confluent-platform.json +++ b/grafana-dashboards/cfk/confluent-platform.json @@ -139,7 +139,7 @@ "targets": [ { "datasource": null, - "expr": "count(zookeeper_status_quorumsize{namespace=\"$ns\"})", + "expr": "count(zookeeper_status_quorumsize{namespace=\"$env\"})", "format": "time_series", "hide": false, "instant": false, @@ -222,7 +222,7 @@ "targets": [ { "datasource": null, - "expr": "avg(zookeeper_inmemorydatatree_nodecount{namespace=\"$ns\"})", + "expr": "avg(zookeeper_inmemorydatatree_nodecount{namespace=\"$env\"})", "format": "time_series", "hide": false, "instant": false, @@ -305,7 +305,7 @@ "targets": [ { "datasource": null, - "expr": "sum(zookeeper_numaliveconnections{namespace=\"$ns\"})", + "expr": "sum(zookeeper_numaliveconnections{namespace=\"$env\"})", "format": "time_series", "hide": false, "instant": false, @@ -388,7 +388,7 @@ "targets": [ { "datasource": null, - "expr": "sum(zookeeper_inmemorydatatree_watchcount{namespace=\"$ns\"})", + "expr": "sum(zookeeper_inmemorydatatree_watchcount{namespace=\"$env\"})", "format": "time_series", "hide": false, "instant": false, @@ -487,7 +487,7 @@ "targets": [ { "datasource": null, - "expr": "zookeeper_outstandingrequests{namespace=\"$ns\"}", + "expr": "zookeeper_outstandingrequests{namespace=\"$env\"}", "format": "time_series", "hide": false, "instant": false, @@ -611,7 +611,7 @@ "targets": [ { "datasource": null, - "expr": "count(kafka_server_replicamanager_leadercount{namespace=\"$ns\"})", + "expr": "count(kafka_server_replicamanager_leadercount{namespace=\"$env\"})", "format": "time_series", "hide": false, "instant": false, @@ -694,7 +694,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_controller_kafkacontroller_activecontrollercount{namespace=\"$ns\"} > 0", + "expr": "kafka_controller_kafkacontroller_activecontrollercount{namespace=\"$env\"} > 0", "format": "time_series", "hide": false, "instant": false, @@ -777,7 +777,7 @@ "targets": [ { "datasource": null, - "expr": "sum(kafka_server_replicamanager_partitioncount{namespace=\"$ns\"})", + "expr": "sum(kafka_server_replicamanager_partitioncount{namespace=\"$env\"})", "format": "time_series", "hide": false, "instant": false, @@ -792,7 +792,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "Kafka: Sum of Partitions", + "title": "Kafka: Sum of Partitioenv", "transformations": [], "transparent": false, "type": "stat" @@ -868,7 +868,7 @@ "targets": [ { "datasource": null, - "expr": "sum(kafka_server_replicamanager_underreplicatedpartitions{namespace=\"$ns\"})", + "expr": "sum(kafka_server_replicamanager_underreplicatedpartitions{namespace=\"$env\"})", "format": "time_series", "hide": false, "instant": false, @@ -959,7 +959,7 @@ "targets": [ { "datasource": null, - "expr": "sum(kafka_cluster_partition_underminisr{namespace=\"$ns\"})", + "expr": "sum(kafka_cluster_partition_underminisr{namespace=\"$env\"})", "format": "time_series", "hide": false, "instant": false, @@ -974,7 +974,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "Kafka: Sum of Partitions Under-MinISR", + "title": "Kafka: Sum of Partitioenv Under-MinISR", "transformations": [], "transparent": false, "type": "stat" @@ -1050,7 +1050,7 @@ "targets": [ { "datasource": null, - "expr": "sum(kafka_controller_kafkacontroller_offlinepartitionscount{namespace=\"$ns\"})", + "expr": "sum(kafka_controller_kafkacontroller_offlinepartitionscount{namespace=\"$env\"})", "format": "time_series", "hide": false, "instant": false, @@ -1190,7 +1190,7 @@ "targets": [ { "datasource": null, - "expr": "count(kafka_schema_registry_registered_count{namespace=\"$ns\"})", + "expr": "count(kafka_schema_registry_registered_count{namespace=\"$env\"})", "format": "time_series", "hide": false, "instant": false, @@ -1273,7 +1273,7 @@ "targets": [ { "datasource": null, - "expr": "avg(kafka_schema_registry_registered_count{namespace=\"$ns\"})", + "expr": "avg(kafka_schema_registry_registered_count{namespace=\"$env\"})", "format": "time_series", "hide": false, "instant": true, @@ -1356,7 +1356,7 @@ "targets": [ { "datasource": null, - "expr": "avg(kafka_schema_registry_schemas_created{namespace=\"$ns\"}) by (schema_type)", + "expr": "avg(kafka_schema_registry_schemas_created{namespace=\"$env\"}) by (schema_type)", "format": "time_series", "hide": false, "instant": false, @@ -1439,7 +1439,7 @@ "targets": [ { "datasource": null, - "expr": "sum(kafka_schema_registry_schemas_deleted{namespace=\"$ns\"}) by (schema_type)", + "expr": "sum(kafka_schema_registry_schemas_deleted{namespace=\"$env\"}) by (schema_type)", "format": "time_series", "hide": false, "instant": false, @@ -1552,7 +1552,7 @@ "targets": [ { "datasource": null, - "expr": "count(kafka_connect_connect_worker_metrics_connector_count{namespace=\"$ns\",app=~\"$connect_app\"})", + "expr": "count(kafka_connect_connect_worker_metrics_connector_count{namespace=\"$env\",app=~\"$connect_cluster\"})", "format": "time_series", "hide": false, "instant": false, @@ -1635,7 +1635,7 @@ "targets": [ { "datasource": null, - "expr": "sum(kafka_connect_connect_worker_metrics_connector_total_task_count{namespace=\"$ns\",app=~\"$connect_app\"})", + "expr": "sum(kafka_connect_connect_worker_metrics_connector_total_task_count{namespace=\"$env\",app=~\"$connect_cluster\"})", "format": "time_series", "hide": false, "instant": false, @@ -1726,7 +1726,7 @@ "targets": [ { "datasource": null, - "expr": "sum(kafka_connect_connect_worker_metrics_connector_running_task_count{namespace=\"$ns\",app=~\"$connect_app\"})", + "expr": "sum(kafka_connect_connect_worker_metrics_connector_running_task_count{namespace=\"$env\",app=~\"$connect_cluster\"})", "format": "time_series", "hide": false, "instant": false, @@ -1817,7 +1817,7 @@ "targets": [ { "datasource": null, - "expr": "sum(kafka_connect_connect_worker_metrics_connector_paused_task_count{namespace=\"$ns\",app=~\"$connect_app\"})", + "expr": "sum(kafka_connect_connect_worker_metrics_connector_paused_task_count{namespace=\"$env\",app=~\"$connect_cluster\"})", "format": "time_series", "hide": false, "instant": false, @@ -1908,7 +1908,7 @@ "targets": [ { "datasource": null, - "expr": "sum(kafka_connect_connect_worker_metrics_connector_failed_task_count{namespace=\"$ns\",app=~\"$connect_app\"})", + "expr": "sum(kafka_connect_connect_worker_metrics_connector_failed_task_count{namespace=\"$env\",app=~\"$connect_cluster\"})", "format": "time_series", "hide": false, "instant": false, @@ -1991,7 +1991,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_connect_connect_worker_rebalance_metrics_time_since_last_rebalance_ms{namespace=\"$ns\",app=~\"$connect_app\"} >= 0", + "expr": "kafka_connect_connect_worker_rebalance_metrics_time_since_last_rebalance_ms{namespace=\"$env\",app=~\"$connect_cluster\"} >= 0", "format": "time_series", "hide": false, "instant": false, @@ -2012,13 +2012,13 @@ "type": "stat" } ], - "repeat": "connect_app", + "repeat": "connect_cluster", "repeatDirection": null, "span": null, "targets": [], "timeFrom": null, "timeShift": null, - "title": "Kafka Connect cluster: $connect_app", + "title": "Kafka Connect cluster: $connect_cluster", "transformations": [], "transparent": false, "type": "row" @@ -2116,7 +2116,7 @@ "targets": [ { "datasource": null, - "expr": "count(ksql_ksql_engine_query_stats_num_active_queries{namespace=\"$ns\", app=\"$ksqldb_app\"})", + "expr": "count(ksql_ksql_engine_query_stats_num_active_queries{namespace=\"$env\", app=\"$ksqldb_cluster\"})", "format": "time_series", "hide": false, "instant": false, @@ -2199,7 +2199,7 @@ "targets": [ { "datasource": null, - "expr": "sum(ksql_ksql_engine_query_stats_num_active_queries{namespace=\"$ns\", app=\"$ksqldb_app\"})", + "expr": "sum(ksql_ksql_engine_query_stats_num_active_queries{namespace=\"$env\", app=\"$ksqldb_cluster\"})", "format": "time_series", "hide": false, "instant": false, @@ -2290,7 +2290,7 @@ "targets": [ { "datasource": null, - "expr": "sum(ksql_ksql_engine_query_stats_running_queries{namespace=\"$ns\", app=\"$ksqldb_app\"})", + "expr": "sum(ksql_ksql_engine_query_stats_running_queries{namespace=\"$env\", app=\"$ksqldb_cluster\"})", "format": "time_series", "hide": false, "instant": false, @@ -2381,7 +2381,7 @@ "targets": [ { "datasource": null, - "expr": "sum(ksql_ksql_engine_query_stats_rebalancing_queries{namespace=\"$ns\", app=\"$ksqldb_app\"})", + "expr": "sum(ksql_ksql_engine_query_stats_rebalancing_queries{namespace=\"$env\", app=\"$ksqldb_cluster\"})", "format": "time_series", "hide": false, "instant": false, @@ -2472,7 +2472,7 @@ "targets": [ { "datasource": null, - "expr": "avg(ksql_ksql_engine_query_stats_error_queries{namespace=\"$ns\", app=\"$ksqldb_app\"})", + "expr": "avg(ksql_ksql_engine_query_stats_error_queries{namespace=\"$env\", app=\"$ksqldb_cluster\"})", "format": "time_series", "hide": false, "instant": false, @@ -2493,13 +2493,13 @@ "type": "stat" } ], - "repeat": "ksqldb_app", + "repeat": "ksqldb_cluster", "repeatDirection": null, "span": null, "targets": [], "timeFrom": null, "timeShift": null, - "title": "ksqlDB cluster: $ksqldb_app", + "title": "ksqlDB cluster: $ksqldb_cluster", "transformations": [], "transparent": false, "type": "row" @@ -2534,9 +2534,9 @@ "datasource": "Prometheus", "hide": 0, "includeAll": false, - "label": "Namespace", + "label": "Environment", "multi": false, - "name": "ns", + "name": "env", "options": [], "query": "label_values(namespace)", "refresh": 1, @@ -2559,13 +2559,13 @@ "value": null }, "datasource": "Prometheus", - "hide": 2, + "hide": true, "includeAll": false, "label": "Kafka Connect cluster", "multi": false, - "name": "connect_app", + "name": "connect_cluster", "options": [], - "query": "label_values(kafka_connect_connect_worker_metrics_connector_count{namespace=\"$ns\"}, app)", + "query": "label_values(kafka_connect_connect_worker_metrics_connector_count{namespace=\"$env\"}, app)", "refresh": 1, "regex": null, "sort": 1, @@ -2586,13 +2586,13 @@ "value": null }, "datasource": "Prometheus", - "hide": 2, + "hide": true, "includeAll": false, "label": "ksqlDB cluster", "multi": false, - "name": "ksqldb_app", + "name": "ksqldb_cluster", "options": [], - "query": "label_values(ksql_ksql_engine_query_stats_num_active_queries{namespace=\"$ns\"},app)", + "query": "label_values(ksql_ksql_engine_query_stats_liveness_indicator{namespace=\"$env\"}, app)", "refresh": 1, "regex": null, "sort": 1, diff --git a/grafana-dashboards/kafka-cluster.json b/grafana-dashboards/cfk/kafka-cluster.json similarity index 97% rename from grafana-dashboards/kafka-cluster.json rename to grafana-dashboards/cfk/kafka-cluster.json index 240803ed..9e2add0b 100644 --- a/grafana-dashboards/kafka-cluster.json +++ b/grafana-dashboards/cfk/kafka-cluster.json @@ -55,7 +55,7 @@ "targets": [], "timeFrom": null, "timeShift": null, - "title": "Health-check", + "title": "Overview", "transformations": [], "transparent": false, "type": "row" @@ -123,7 +123,7 @@ "targets": [ { "datasource": null, - "expr": "count(kafka_server_replicamanager_leadercount{namespace=\"$ns\"})", + "expr": "count(kafka_server_replicamanager_leadercount{namespace=\"$env\"})", "format": "time_series", "hide": false, "instant": false, @@ -206,7 +206,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_controller_kafkacontroller_activecontrollercount{namespace=\"$ns\"} > 0", + "expr": "kafka_controller_kafkacontroller_activecontrollercount{namespace=\"$env\"} > 0", "format": "time_series", "hide": false, "instant": false, @@ -289,7 +289,7 @@ "targets": [ { "datasource": null, - "expr": "sum(kafka_controller_kafkacontroller_preferredreplicaimbalancecount{namespace=\"$ns\"})", + "expr": "sum(kafka_controller_kafkacontroller_preferredreplicaimbalancecount{namespace=\"$env\"})", "format": "time_series", "hide": false, "instant": false, @@ -372,7 +372,7 @@ "targets": [ { "datasource": null, - "expr": "sum(kafka_controller_kafkacontroller_globaltopiccount{namespace=\"$ns\"})", + "expr": "sum(kafka_controller_kafkacontroller_globaltopiccount{namespace=\"$env\"})", "format": "time_series", "hide": false, "instant": false, @@ -455,7 +455,7 @@ "targets": [ { "datasource": null, - "expr": "sum(rate(kafka_network_requestmetrics_requestspersec{namespace=\"$ns\",pod=~\"$broker\"}[5m]))", + "expr": "sum(rate(kafka_network_requestmetrics_requestspersec{namespace=\"$env\",pod=~\"$broker\"}[5m]))", "format": "time_series", "hide": false, "instant": false, @@ -538,7 +538,7 @@ "targets": [ { "datasource": null, - "expr": "sum(kafka_log_log_size{namespace=\"$ns\",pod=~\"$broker\"}) by (pod)", + "expr": "sum(kafka_log_log_size{namespace=\"$env\",pod=~\"$broker\"}) by (pod)", "format": "time_series", "hide": false, "instant": false, @@ -621,7 +621,7 @@ "targets": [ { "datasource": null, - "expr": "sum(kafka_server_replicamanager_partitioncount{namespace=\"$ns\",pod=~\"$broker\"})", + "expr": "sum(kafka_server_replicamanager_partitioncount{namespace=\"$env\",pod=~\"$broker\"})", "format": "time_series", "hide": false, "instant": false, @@ -712,7 +712,7 @@ "targets": [ { "datasource": null, - "expr": "sum(kafka_server_replicamanager_underreplicatedpartitions{namespace=\"$ns\",pod=~\"$broker\"})", + "expr": "sum(kafka_server_replicamanager_underreplicatedpartitions{namespace=\"$env\",pod=~\"$broker\"})", "format": "time_series", "hide": false, "instant": false, @@ -803,7 +803,7 @@ "targets": [ { "datasource": null, - "expr": "sum(kafka_cluster_partition_underminisr{namespace=\"$ns\",pod=~\"$broker\"})", + "expr": "sum(kafka_cluster_partition_underminisr{namespace=\"$env\",pod=~\"$broker\"})", "format": "time_series", "hide": false, "instant": false, @@ -894,7 +894,7 @@ "targets": [ { "datasource": null, - "expr": "sum(kafka_controller_kafkacontroller_offlinepartitionscount{namespace=\"$ns\",pod=~\"$broker\"})", + "expr": "sum(kafka_controller_kafkacontroller_offlinepartitionscount{namespace=\"$env\",pod=~\"$broker\"})", "format": "time_series", "hide": false, "instant": false, @@ -977,7 +977,7 @@ "targets": [ { "datasource": null, - "expr": "sum(rate(kafka_server_brokertopicmetrics_bytesinpersec{namespace=\"$ns\",pod=~\"$broker\"}[5m]))", + "expr": "sum(rate(kafka_server_brokertopicmetrics_bytesinpersec{namespace=\"$env\",pod=~\"$broker\"}[5m]))", "format": "time_series", "hide": false, "instant": false, @@ -1060,7 +1060,7 @@ "targets": [ { "datasource": null, - "expr": "sum(rate(kafka_server_brokertopicmetrics_bytesoutpersec{namespace=\"$ns\",pod=~\"$broker\"}[5m]))", + "expr": "sum(rate(kafka_server_brokertopicmetrics_bytesoutpersec{namespace=\"$env\",pod=~\"$broker\"}[5m]))", "format": "time_series", "hide": false, "instant": false, @@ -1201,7 +1201,7 @@ "targets": [ { "datasource": null, - "expr": "irate(process_cpu_seconds_total{namespace=\"$ns\",type=\"kafka\"}[5m])", + "expr": "irate(process_cpu_seconds_total{namespace=\"$env\",pod=~\"$broker\"}[5m])", "format": "time_series", "hide": false, "instant": false, @@ -1301,7 +1301,7 @@ "targets": [ { "datasource": null, - "expr": "sum without(area)(jvm_memory_bytes_used{namespace=\"$ns\",type=\"kafka\"})", + "expr": "sum without(area)(jvm_memory_bytes_used{namespace=\"$env\",pod=~\"$broker\"})", "format": "time_series", "hide": false, "instant": false, @@ -1401,7 +1401,7 @@ "targets": [ { "datasource": null, - "expr": "sum without(gc)(irate(jvm_gc_collection_seconds_sum{namespace=\"$ns\",type=\"kafka\"}[5m]))", + "expr": "sum without(gc)(irate(jvm_gc_collection_seconds_sum{namespace=\"$env\",pod=~\"$broker\"}[5m]))", "format": "time_series", "hide": false, "instant": false, @@ -1531,7 +1531,7 @@ "targets": [ { "datasource": null, - "expr": "sum without(topic) (rate(kafka_server_brokertopicmetrics_messagesinpersec{namespace=\"$ns\",pod=~\"$broker\"}[5m]))", + "expr": "sum without(topic) (rate(kafka_server_brokertopicmetrics_messagesinpersec{namespace=\"$env\",pod=~\"$broker\"}[5m]))", "format": "time_series", "hide": false, "instant": false, @@ -1631,7 +1631,7 @@ "targets": [ { "datasource": null, - "expr": "sum without(topic) (rate(kafka_server_brokertopicmetrics_bytesinpersec{namespace=\"$ns\",pod=~\"$broker\"}[5m]))", + "expr": "sum without(topic) (rate(kafka_server_brokertopicmetrics_bytesinpersec{namespace=\"$env\",pod=~\"$broker\"}[5m]))", "format": "time_series", "hide": false, "instant": false, @@ -1731,7 +1731,7 @@ "targets": [ { "datasource": null, - "expr": "sum without(topic) (rate(kafka_server_brokertopicmetrics_bytesoutpersec{namespace=\"$ns\",pod=~\"$broker\"}[5m]))", + "expr": "sum without(topic) (rate(kafka_server_brokertopicmetrics_bytesoutpersec{namespace=\"$env\",pod=~\"$broker\"}[5m]))", "format": "time_series", "hide": false, "instant": false, @@ -1873,7 +1873,7 @@ "targets": [ { "datasource": null, - "expr": "1-kafka_network_socketserver_networkprocessoravgidlepercent{namespace=\"$ns\",pod=~\"$broker\"}", + "expr": "1-kafka_network_socketserver_networkprocessoravgidlepercent{namespace=\"$env\",pod=~\"$broker\"}", "format": "time_series", "hide": false, "instant": false, @@ -1973,7 +1973,7 @@ "targets": [ { "datasource": null, - "expr": "1-kafka_server_kafkarequesthandlerpool_requesthandleravgidlepercent_total{namespace=\"$ns\",pod=~\"$broker\"}", + "expr": "1-kafka_server_kafkarequesthandlerpool_requesthandleravgidlepercent_total{namespace=\"$env\",pod=~\"$broker\"}", "format": "time_series", "hide": false, "instant": false, @@ -2118,7 +2118,7 @@ "targets": [ { "datasource": null, - "expr": "sum without(pod,instance,statefulset_kubernetes_io_pod_name)(rate(kafka_network_requestmetrics_requestspersec{namespace=\"$ns\",pod=~\"$broker\"}[5m]))", + "expr": "sum without(pod,instance,statefulset_kubernetes_io_pod_name)(rate(kafka_network_requestmetrics_requestspersec{namespace=\"$env\",pod=~\"$broker\"}[5m]))", "format": "time_series", "hide": false, "instant": false, @@ -2221,7 +2221,7 @@ "targets": [ { "datasource": null, - "expr": "sum without(pod,instance,statefulset_kubernetes_io_pod_name)(rate(kafka_network_requestmetrics_errorspersec{namespace=\"$ns\",pod=~\"$broker\",error!=\"NONE\"}[5m]))", + "expr": "sum without(pod,instance,statefulset_kubernetes_io_pod_name)(rate(kafka_network_requestmetrics_errorspersec{namespace=\"$env\",pod=~\"$broker\",error!=\"NONE\"}[5m]))", "format": "time_series", "hide": false, "instant": false, @@ -2363,7 +2363,7 @@ "targets": [ { "datasource": null, - "expr": "sum(kafka_server_socketservermetrics_connection_count{namespace=\"$ns\",pod=~\"$broker\"}) by (pod)", + "expr": "sum(kafka_server_socketservermetrics_connection_count{namespace=\"$env\",pod=~\"$broker\"}) by (pod)", "format": "time_series", "hide": false, "instant": false, @@ -2463,7 +2463,7 @@ "targets": [ { "datasource": null, - "expr": "sum(kafka_server_socketservermetrics_connection_creation_rate{namespace=\"$ns\",pod=~\"$broker\"}) by (pod)", + "expr": "sum(kafka_server_socketservermetrics_connection_creation_rate{namespace=\"$env\",pod=~\"$broker\"}) by (pod)", "format": "time_series", "hide": false, "instant": false, @@ -2563,7 +2563,7 @@ "targets": [ { "datasource": null, - "expr": "sum(kafka_server_socketservermetrics_connection_close_rate{namespace=\"$ns\",pod=~\"$broker\"}) by (pod)", + "expr": "sum(kafka_server_socketservermetrics_connection_close_rate{namespace=\"$env\",pod=~\"$broker\"}) by (pod)", "format": "time_series", "hide": false, "instant": false, @@ -2663,7 +2663,7 @@ "targets": [ { "datasource": null, - "expr": "sum(kafka_server_socketservermetrics_connection_count{namespace=\"$ns\",pod=~\"$broker\"}) by (listener)", + "expr": "sum(kafka_server_socketservermetrics_connection_count{namespace=\"$env\",pod=~\"$broker\"}) by (listener)", "format": "time_series", "hide": false, "instant": false, @@ -2763,7 +2763,7 @@ "targets": [ { "datasource": null, - "expr": "sum(kafka_server_socketservermetrics_connection_creation_rate{namespace=\"$ns\",pod=~\"$broker\"}) by (listener)", + "expr": "sum(kafka_server_socketservermetrics_connection_creation_rate{namespace=\"$env\",pod=~\"$broker\"}) by (listener)", "format": "time_series", "hide": false, "instant": false, @@ -2863,7 +2863,7 @@ "targets": [ { "datasource": null, - "expr": "sum(kafka_server_socketservermetrics_connection_close_rate{namespace=\"$ns\",pod=~\"$broker\"}) by (listener)", + "expr": "sum(kafka_server_socketservermetrics_connection_close_rate{namespace=\"$env\",pod=~\"$broker\"}) by (listener)", "format": "time_series", "hide": false, "instant": false, @@ -3005,7 +3005,7 @@ "targets": [ { "datasource": null, - "expr": "rate(kafka_server_replicamanager_isrshrinkspersec{namespace=\"$ns\",pod=~\"$broker\"}[5m])", + "expr": "rate(kafka_server_replicamanager_isrshrinkspersec{namespace=\"$env\",pod=~\"$broker\"}[5m])", "format": "time_series", "hide": false, "instant": false, @@ -3105,7 +3105,7 @@ "targets": [ { "datasource": null, - "expr": "rate(kafka_server_replicamanager_isrexpandspersec{namespace=\"$ns\",pod=~\"$broker\"}[5m])", + "expr": "rate(kafka_server_replicamanager_isrexpandspersec{namespace=\"$env\",pod=~\"$broker\"}[5m])", "format": "time_series", "hide": false, "instant": false, @@ -3247,7 +3247,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_network_requestmetrics_requestqueuetimems{namespace=\"$ns\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"Produce\"}", + "expr": "kafka_network_requestmetrics_requestqueuetimems{namespace=\"$env\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"Produce\"}", "format": "time_series", "hide": false, "instant": false, @@ -3347,7 +3347,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_network_requestmetrics_localtimems{namespace=\"$ns\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"Produce\"}", + "expr": "kafka_network_requestmetrics_localtimems{namespace=\"$env\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"Produce\"}", "format": "time_series", "hide": false, "instant": false, @@ -3447,7 +3447,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_network_requestmetrics_remotetimems{namespace=\"$ns\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"Produce\"}", + "expr": "kafka_network_requestmetrics_remotetimems{namespace=\"$env\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"Produce\"}", "format": "time_series", "hide": false, "instant": false, @@ -3547,7 +3547,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_network_requestmetrics_responsequeuetimems{namespace=\"$ns\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"Produce\"}", + "expr": "kafka_network_requestmetrics_responsequeuetimems{namespace=\"$env\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"Produce\"}", "format": "time_series", "hide": false, "instant": false, @@ -3647,7 +3647,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_network_requestmetrics_responsesendtimems{namespace=\"$ns\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"Produce\"}", + "expr": "kafka_network_requestmetrics_responsesendtimems{namespace=\"$env\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"Produce\"}", "format": "time_series", "hide": false, "instant": false, @@ -3789,7 +3789,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_network_requestmetrics_requestqueuetimems{namespace=\"$ns\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"Fetch\"}", + "expr": "kafka_network_requestmetrics_requestqueuetimems{namespace=\"$env\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"Fetch\"}", "format": "time_series", "hide": false, "instant": false, @@ -3889,7 +3889,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_network_requestmetrics_localtimems{namespace=\"$ns\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"Fetch\"}", + "expr": "kafka_network_requestmetrics_localtimems{namespace=\"$env\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"Fetch\"}", "format": "time_series", "hide": false, "instant": false, @@ -3989,7 +3989,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_network_requestmetrics_remotetimems{namespace=\"$ns\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"Fetch\"}", + "expr": "kafka_network_requestmetrics_remotetimems{namespace=\"$env\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"Fetch\"}", "format": "time_series", "hide": false, "instant": false, @@ -4089,7 +4089,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_network_requestmetrics_responsequeuetimems{namespace=\"$ns\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"Fetch\"}", + "expr": "kafka_network_requestmetrics_responsequeuetimems{namespace=\"$env\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"Fetch\"}", "format": "time_series", "hide": false, "instant": false, @@ -4189,7 +4189,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_network_requestmetrics_responsesendtimems{namespace=\"$ns\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"Fetch\"}", + "expr": "kafka_network_requestmetrics_responsesendtimems{namespace=\"$env\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"Fetch\"}", "format": "time_series", "hide": false, "instant": false, @@ -4331,7 +4331,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_network_requestmetrics_requestqueuetimems{namespace=\"$ns\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"FetchFollower\"}", + "expr": "kafka_network_requestmetrics_requestqueuetimems{namespace=\"$env\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"FetchFollower\"}", "format": "time_series", "hide": false, "instant": false, @@ -4431,7 +4431,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_network_requestmetrics_localtimems{namespace=\"$ns\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"FetchFollower\"}", + "expr": "kafka_network_requestmetrics_localtimems{namespace=\"$env\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"FetchFollower\"}", "format": "time_series", "hide": false, "instant": false, @@ -4531,7 +4531,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_network_requestmetrics_remotetimems{namespace=\"$ns\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"FetchFollower\"}", + "expr": "kafka_network_requestmetrics_remotetimems{namespace=\"$env\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"FetchFollower\"}", "format": "time_series", "hide": false, "instant": false, @@ -4631,7 +4631,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_network_requestmetrics_responsequeuetimems{namespace=\"$ns\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"FetchFollower\"}", + "expr": "kafka_network_requestmetrics_responsequeuetimems{namespace=\"$env\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"FetchFollower\"}", "format": "time_series", "hide": false, "instant": false, @@ -4731,7 +4731,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_network_requestmetrics_responsesendtimems{namespace=\"$ns\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"FetchFollower\"}", + "expr": "kafka_network_requestmetrics_responsesendtimems{namespace=\"$env\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"FetchFollower\"}", "format": "time_series", "hide": false, "instant": false, @@ -4873,7 +4873,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_coordinator_group_groupmetadatamanager_numgroups{namespace=\"$ns\",pod=~\"$broker\"}", + "expr": "kafka_coordinator_group_groupmetadatamanager_numgroups{namespace=\"$env\",pod=~\"$broker\"}", "format": "time_series", "hide": false, "instant": false, @@ -4975,7 +4975,7 @@ "targets": [ { "datasource": null, - "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupsstable{namespace=\"$ns\",pod=~\"$broker\"})", + "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupsstable{namespace=\"$env\",pod=~\"$broker\"})", "format": "time_series", "hide": false, "instant": false, @@ -4989,7 +4989,7 @@ }, { "datasource": null, - "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupspreparingrebalance{namespace=\"$ns\",pod=~\"$broker\"})", + "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupspreparingrebalance{namespace=\"$env\",pod=~\"$broker\"})", "format": "time_series", "hide": false, "instant": false, @@ -5003,7 +5003,7 @@ }, { "datasource": null, - "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupsdead{namespace=\"$ns\",pod=~\"$broker\"})", + "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupsdead{namespace=\"$env\",pod=~\"$broker\"})", "format": "time_series", "hide": false, "instant": false, @@ -5017,7 +5017,7 @@ }, { "datasource": null, - "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupscompletingrebalance{namespace=\"$ns\",pod=~\"$broker\"})", + "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupscompletingrebalance{namespace=\"$env\",pod=~\"$broker\"})", "format": "time_series", "hide": false, "instant": false, @@ -5031,7 +5031,7 @@ }, { "datasource": null, - "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupsempty{namespace=\"$ns\",pod=~\"$broker\"})", + "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupsempty{namespace=\"$env\",pod=~\"$broker\"})", "format": "time_series", "hide": false, "instant": false, @@ -5173,7 +5173,7 @@ "targets": [ { "datasource": null, - "expr": "sum(kafka_server_brokertopicmetrics_producemessageconversionspersec{namespace=\"$ns\",pod=~\"$broker\"})", + "expr": "sum(kafka_server_brokertopicmetrics_producemessageconversionspersec{namespace=\"$env\",pod=~\"$broker\"})", "format": "time_series", "hide": false, "instant": false, @@ -5273,7 +5273,7 @@ "targets": [ { "datasource": null, - "expr": "sum(kafka_server_brokertopicmetrics_fetchmessageconversionspersec{namespace=\"$ns\",pod=~\"$broker\"})", + "expr": "sum(kafka_server_brokertopicmetrics_fetchmessageconversionspersec{namespace=\"$env\",pod=~\"$broker\"})", "format": "time_series", "hide": false, "instant": false, @@ -5373,7 +5373,7 @@ "targets": [ { "datasource": null, - "expr": "sum(kafka_server_socketservermetrics_connections{namespace=\"$ns\",pod=~\"$broker\"}) by (client_software_name,client_software_version)", + "expr": "sum(kafka_server_socketservermetrics_connections{namespace=\"$env\",pod=~\"$broker\"}) by (client_software_name,client_software_version)", "format": "time_series", "hide": false, "instant": false, @@ -5431,9 +5431,9 @@ "datasource": "Prometheus", "hide": 0, "includeAll": false, - "label": "Namespace", + "label": "Environment", "multi": false, - "name": "ns", + "name": "env", "options": [], "query": "label_values(namespace)", "refresh": 1, @@ -5462,7 +5462,7 @@ "multi": true, "name": "broker", "options": [], - "query": "label_values(kafka_server_replicamanager_leadercount{namespace=\"$ns\"}, pod)", + "query": "label_values(kafka_server_replicamanager_leadercount{namespace=\"$env\"}, pod)", "refresh": 1, "regex": null, "sort": 1, @@ -5489,7 +5489,7 @@ "multi": false, "name": "quantile", "options": [], - "query": "label_values(kafka_network_requestmetrics_requestqueuetimems{namespace=\"$ns\"}, quantile)", + "query": "label_values(kafka_network_requestmetrics_requestqueuetimems{namespace=\"$env\"}, quantile)", "refresh": 1, "regex": null, "sort": 1, diff --git a/grafana-dashboards/kafka-connect-cluster.json b/grafana-dashboards/cfk/kafka-connect-cluster.json similarity index 95% rename from grafana-dashboards/kafka-connect-cluster.json rename to grafana-dashboards/cfk/kafka-connect-cluster.json index 0ccbb907..3f2b4934 100644 --- a/grafana-dashboards/kafka-connect-cluster.json +++ b/grafana-dashboards/cfk/kafka-connect-cluster.json @@ -55,7 +55,7 @@ "targets": [], "timeFrom": null, "timeShift": null, - "title": "Health-check", + "title": "Overview", "transformations": [], "transparent": false, "type": "row" @@ -115,7 +115,7 @@ "fields": "", "values": false }, - "textMode": "value_and_name" + "textMode": "auto" }, "repeat": null, "repeatDirection": null, @@ -123,13 +123,13 @@ "targets": [ { "datasource": null, - "expr": "count(kafka_connect_app_info{namespace=\"$ns\",app=\"$connect_app\",version!=\"\"})", + "expr": "count(kafka_connect_app_info{namespace=\"$env\",app=\"$connect_cluster\",version!=\"\"})", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 2, - "legendFormat": "{{version}}", + "legendFormat": "", "metric": "", "refId": "", "step": 10, @@ -206,7 +206,7 @@ "targets": [ { "datasource": null, - "expr": "sum(kafka_connect_connect_worker_metrics_connector_total_task_count{namespace=\"$ns\",app=\"$connect_app\"})", + "expr": "sum(kafka_connect_connect_worker_metrics_connector_total_task_count{namespace=\"$env\",app=\"$connect_cluster\"})", "format": "time_series", "hide": false, "instant": false, @@ -297,7 +297,7 @@ "targets": [ { "datasource": null, - "expr": "sum(kafka_connect_connect_worker_metrics_connector_running_task_count{namespace=\"$ns\",app=\"$connect_app\"})", + "expr": "sum(kafka_connect_connect_worker_metrics_connector_running_task_count{namespace=\"$env\",app=\"$connect_cluster\"})", "format": "time_series", "hide": false, "instant": false, @@ -388,7 +388,7 @@ "targets": [ { "datasource": null, - "expr": "sum(kafka_connect_connect_worker_metrics_connector_paused_task_count{namespace=\"$ns\",app=\"$connect_app\"})", + "expr": "sum(kafka_connect_connect_worker_metrics_connector_paused_task_count{namespace=\"$env\",app=\"$connect_cluster\"})", "format": "time_series", "hide": false, "instant": false, @@ -479,7 +479,7 @@ "targets": [ { "datasource": null, - "expr": "sum(kafka_connect_connect_worker_metrics_connector_failed_task_count{namespace=\"$ns\",app=\"$connect_app\"})", + "expr": "sum(kafka_connect_connect_worker_metrics_connector_failed_task_count{namespace=\"$env\",app=\"$connect_cluster\"})", "format": "time_series", "hide": false, "instant": false, @@ -562,7 +562,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_connect_connect_worker_rebalance_metrics_time_since_last_rebalance_ms{namespace=\"$ns\",app=\"$connect_app\"} >= 0", + "expr": "kafka_connect_connect_worker_rebalance_metrics_time_since_last_rebalance_ms{namespace=\"$env\",app=\"$connect_cluster\"} >= 0", "format": "time_series", "hide": false, "instant": false, @@ -631,7 +631,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_connect_app_info{namespace=\"$ns\",app=\"$connect_app\",start_time_ms!=\"\"}", + "expr": "kafka_connect_app_info{namespace=\"$env\",app=\"$connect_cluster\",start_time_ms!=\"\"}", "format": "table", "hide": false, "instant": true, @@ -645,7 +645,7 @@ }, { "datasource": null, - "expr": "kafka_connect_app_info{namespace=\"$ns\",app=\"$connect_app\",version!=\"\"}", + "expr": "kafka_connect_app_info{namespace=\"$env\",app=\"$connect_cluster\",version!=\"\"}", "format": "table", "hide": false, "instant": true, @@ -659,7 +659,7 @@ }, { "datasource": null, - "expr": "sum by (pod) (kafka_connect_connect_worker_metrics_connector_count{namespace=\"$ns\",app=\"$connect_app\"})", + "expr": "sum by (pod) (kafka_connect_connect_worker_metrics_connector_count{namespace=\"$env\",app=\"$connect_cluster\"})", "format": "table", "hide": false, "instant": true, @@ -673,7 +673,7 @@ }, { "datasource": null, - "expr": "sum by (pod) (kafka_connect_connect_worker_metrics_connector_startup_success_total{namespace=\"$ns\",app=\"$connect_app\"})", + "expr": "sum by (pod) (kafka_connect_connect_worker_metrics_connector_startup_success_total{namespace=\"$env\",app=\"$connect_cluster\"})", "format": "table", "hide": false, "instant": true, @@ -687,7 +687,7 @@ }, { "datasource": null, - "expr": "sum by (pod) (kafka_connect_connect_worker_metrics_connector_startup_failure_total{namespace=\"$ns\",app=\"$connect_app\"})", + "expr": "sum by (pod) (kafka_connect_connect_worker_metrics_connector_startup_failure_total{namespace=\"$env\",app=\"$connect_cluster\"})", "format": "table", "hide": false, "instant": true, @@ -701,7 +701,7 @@ }, { "datasource": null, - "expr": "sum by (pod) (kafka_connect_connect_worker_metrics_task_count{namespace=\"$ns\",app=\"$connect_app\"})", + "expr": "sum by (pod) (kafka_connect_connect_worker_metrics_task_count{namespace=\"$env\",app=\"$connect_cluster\"})", "format": "table", "hide": false, "instant": true, @@ -715,7 +715,7 @@ }, { "datasource": null, - "expr": "sum by (pod) (kafka_connect_connect_worker_metrics_task_startup_success_total{namespace=\"$ns\",app=\"$connect_app\"})", + "expr": "sum by (pod) (kafka_connect_connect_worker_metrics_task_startup_success_total{namespace=\"$env\",app=\"$connect_cluster\"})", "format": "table", "hide": false, "instant": true, @@ -729,7 +729,7 @@ }, { "datasource": null, - "expr": "sum by (pod) (kafka_connect_connect_worker_metrics_task_startup_failure_total{namespace=\"$ns\",app=\"$connect_app\"})", + "expr": "sum by (pod) (kafka_connect_connect_worker_metrics_task_startup_failure_total{namespace=\"$env\",app=\"$connect_cluster\"})", "format": "table", "hide": false, "instant": true, @@ -791,24 +791,12 @@ "Value #G": "tasks success", "Value #H": "tasks failure", "app 1": "cluster", - "namespace 1": "namespace", + "namespace 1": "environment", "pod": "worker", "start_time_ms": "start time", "version": "version" } } - }, - { - "id": "convertFieldType", - "options": { - "conversions": [ - { - "destinationType": "number", - "targetField": "start_time_ms" - } - ], - "fields": {} - } } ], "transparent": false, @@ -863,7 +851,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_connect_connector_info{namespace=\"$ns\",app=\"$connect_app\"}", + "expr": "kafka_connect_connector_info{namespace=\"$env\",app=\"$connect_cluster\"}", "format": "table", "hide": false, "instant": true, @@ -877,7 +865,7 @@ }, { "datasource": null, - "expr": "sum by (connector) (kafka_connect_connect_worker_metrics_connector_total_task_count{namespace=\"$ns\",app=\"$connect_app\"})", + "expr": "sum by (connector) (kafka_connect_connect_worker_metrics_connector_total_task_count{namespace=\"$env\",app=\"$connect_cluster\"})", "format": "table", "hide": false, "instant": true, @@ -891,7 +879,7 @@ }, { "datasource": null, - "expr": "sum by (connector) (kafka_connect_connect_worker_metrics_connector_running_task_count{namespace=\"$ns\",app=\"$connect_app\"})", + "expr": "sum by (connector) (kafka_connect_connect_worker_metrics_connector_running_task_count{namespace=\"$env\",app=\"$connect_cluster\"})", "format": "table", "hide": false, "instant": true, @@ -905,7 +893,7 @@ }, { "datasource": null, - "expr": "sum by (connector) (kafka_connect_connect_worker_metrics_connector_failed_task_count{namespace=\"$ns\",app=\"$connect_app\"})", + "expr": "sum by (connector) (kafka_connect_connect_worker_metrics_connector_failed_task_count{namespace=\"$env\",app=\"$connect_cluster\"})", "format": "table", "hide": false, "instant": true, @@ -919,7 +907,7 @@ }, { "datasource": null, - "expr": "sum by (connector) (kafka_connect_connect_worker_metrics_connector_paused_task_count{namespace=\"$ns\",app=\"$connect_app\"})", + "expr": "sum by (connector) (kafka_connect_connect_worker_metrics_connector_paused_task_count{namespace=\"$env\",app=\"$connect_cluster\"})", "format": "table", "hide": false, "instant": true, @@ -1052,7 +1040,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_connect_connector_task_metrics_running_ratio{namespace=\"$ns\",app=\"$connect_app\"}", + "expr": "kafka_connect_connector_task_metrics_running_ratio{namespace=\"$env\",app=\"$connect_cluster\"}", "format": "time_series", "hide": false, "instant": false, @@ -1152,7 +1140,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_connect_connect_worker_rebalance_metrics_rebalance_avg_time_ms{namespace=\"$ns\",app=\"$connect_app\"}", + "expr": "kafka_connect_connect_worker_rebalance_metrics_rebalance_avg_time_ms{namespace=\"$env\",app=\"$connect_cluster\"}", "format": "time_series", "hide": false, "instant": false, @@ -1293,7 +1281,7 @@ "targets": [ { "datasource": null, - "expr": "irate(process_cpu_seconds_total{namespace=\"$ns\",app=\"$connect_app\",type=\"connect\"}[5m])", + "expr": "irate(process_cpu_seconds_total{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\"}[5m])", "format": "time_series", "hide": false, "instant": false, @@ -1393,7 +1381,7 @@ "targets": [ { "datasource": null, - "expr": "sum without(area)(jvm_memory_bytes_used{namespace=\"$ns\",app=\"$connect_app\",type=\"connect\"})", + "expr": "sum without(area)(jvm_memory_bytes_used{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\"})", "format": "time_series", "hide": false, "instant": false, @@ -1493,7 +1481,7 @@ "targets": [ { "datasource": null, - "expr": "sum without(gc)(irate(jvm_gc_collection_seconds_sum{namespace=\"$ns\",app=\"$connect_app\",type=\"connect\"}[5m]))", + "expr": "sum without(gc)(irate(jvm_gc_collection_seconds_sum{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\"}[5m]))", "format": "time_series", "hide": false, "instant": false, @@ -1623,7 +1611,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_connect_connector_task_metrics_batch_size_avg{namespace=\"$ns\",app=\"$connect_app\",pod=~\"$connect_worker\",connector=~\"$connector\"}", + "expr": "kafka_connect_connector_task_metrics_batch_size_avg{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", "hide": false, "instant": false, @@ -1723,7 +1711,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_connect_connector_task_metrics_batch_size_max{namespace=\"$ns\",app=\"$connect_app\",pod=~\"$connect_worker\",connector=~\"$connector\"}", + "expr": "kafka_connect_connector_task_metrics_batch_size_max{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", "hide": false, "instant": false, @@ -1823,7 +1811,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_connect_connector_task_metrics_offset_commit_success_percentage{namespace=\"$ns\",app=\"$connect_app\",pod=~\"$connect_worker\",connector=~\"$connector\"}", + "expr": "kafka_connect_connector_task_metrics_offset_commit_success_percentage{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", "hide": false, "instant": false, @@ -1923,7 +1911,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_connect_connector_task_metrics_offset_commit_avg_time_ms{namespace=\"$ns\",app=\"$connect_app\",pod=~\"$connect_worker\",connector=~\"$connector\"}", + "expr": "kafka_connect_connector_task_metrics_offset_commit_avg_time_ms{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", "hide": false, "instant": false, @@ -2065,7 +2053,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_connect_task_error_metrics_total_record_failures{namespace=\"$ns\",app=\"$connect_app\",pod=~\"$connect_worker\",connector=~\"$connector\"}", + "expr": "kafka_connect_task_error_metrics_total_record_failures{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", "hide": false, "instant": false, @@ -2165,7 +2153,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_connect_task_error_metrics_total_record_errors{namespace=\"$ns\",app=\"$connect_app\",pod=~\"$connect_worker\",connector=~\"$connector\"}", + "expr": "kafka_connect_task_error_metrics_total_record_errors{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", "hide": false, "instant": false, @@ -2265,7 +2253,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_connect_task_error_metrics_total_records_skipped{namespace=\"$ns\",app=\"$connect_app\",pod=~\"$connect_worker\",connector=~\"$connector\"}", + "expr": "kafka_connect_task_error_metrics_total_records_skipped{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", "hide": false, "instant": false, @@ -2365,7 +2353,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_connect_task_error_metrics_total_errors_logged{namespace=\"$ns\",app=\"$connect_app\",pod=~\"$connect_worker\",connector=~\"$connector\"}", + "expr": "kafka_connect_task_error_metrics_total_errors_logged{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", "hide": false, "instant": false, @@ -2465,7 +2453,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_connect_task_error_metrics_total_retries{namespace=\"$ns\",app=\"$connect_app\",pod=~\"$connect_worker\",connector=~\"$connector\"}", + "expr": "kafka_connect_task_error_metrics_total_retries{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", "hide": false, "instant": false, @@ -2565,7 +2553,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_connect_task_error_metrics_deadletterqueue_produce_requests{namespace=\"$ns\",app=\"$connect_app\",pod=~\"$connect_worker\",connector=~\"$connector\"}", + "expr": "kafka_connect_task_error_metrics_deadletterqueue_produce_requests{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", "hide": false, "instant": false, @@ -2707,7 +2695,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_connect_source_task_metrics_poll_batch_avg_time_ms{namespace=\"$ns\",app=\"$connect_app\",pod=~\"$connect_worker\",connector=~\"$connector\"}", + "expr": "kafka_connect_source_task_metrics_poll_batch_avg_time_ms{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", "hide": false, "instant": false, @@ -2807,7 +2795,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_connect_source_task_metrics_poll_batch_max_time_ms{namespace=\"$ns\",app=\"$connect_app\",pod=~\"$connect_worker\",connector=~\"$connector\"}", + "expr": "kafka_connect_source_task_metrics_poll_batch_max_time_ms{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", "hide": false, "instant": false, @@ -2907,7 +2895,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_connect_source_task_metrics_source_record_poll_rate{namespace=\"$ns\",app=\"$connect_app\",pod=~\"$connect_worker\",connector=~\"$connector\"}", + "expr": "kafka_connect_source_task_metrics_source_record_poll_rate{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", "hide": false, "instant": false, @@ -3007,7 +2995,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_connect_source_task_metrics_source_record_write_rate{namespace=\"$ns\",app=\"$connect_app\",pod=~\"$connect_worker\",connector=~\"$connector\"}", + "expr": "kafka_connect_source_task_metrics_source_record_write_rate{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", "hide": false, "instant": false, @@ -3149,7 +3137,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_connect_sink_task_metrics_put_batch_avg_time_ms{namespace=\"$ns\",app=\"$connect_app\",pod=~\"$connect_worker\",connector=~\"$connector\"}", + "expr": "kafka_connect_sink_task_metrics_put_batch_avg_time_ms{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", "hide": false, "instant": false, @@ -3249,7 +3237,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_connect_sink_task_metrics_put_batch_max_time_ms{namespace=\"$ns\",app=\"$connect_app\",pod=~\"$connect_worker\",connector=~\"$connector\"}", + "expr": "kafka_connect_sink_task_metrics_put_batch_max_time_ms{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", "hide": false, "instant": false, @@ -3349,7 +3337,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_connect_sink_task_metrics_partition_count{namespace=\"$ns\",app=\"$connect_app\",pod=~\"$connect_worker\",connector=~\"$connector\"}", + "expr": "kafka_connect_sink_task_metrics_partition_count{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", "hide": false, "instant": false, @@ -3491,7 +3479,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_connect_connect_metrics_incoming_byte_rate{namespace=\"$ns\",app=\"$connect_app\",pod=~\"$connect_worker\"}", + "expr": "kafka_connect_connect_metrics_incoming_byte_rate{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\"}", "format": "time_series", "hide": false, "instant": false, @@ -3591,7 +3579,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_connect_connect_metrics_outgoing_byte_rate{namespace=\"$ns\",app=\"$connect_app\",pod=~\"$connect_worker\"}", + "expr": "kafka_connect_connect_metrics_outgoing_byte_rate{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\"}", "format": "time_series", "hide": false, "instant": false, @@ -3691,7 +3679,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_connect_connect_metrics_io_ratio{namespace=\"$ns\",app=\"$connect_app\",pod=~\"$connect_worker\"}", + "expr": "kafka_connect_connect_metrics_io_ratio{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\"}", "format": "time_series", "hide": false, "instant": false, @@ -3791,7 +3779,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_connect_connect_metrics_network_io_rate{namespace=\"$ns\",app=\"$connect_app\",pod=~\"$connect_worker\"}", + "expr": "kafka_connect_connect_metrics_network_io_rate{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\"}", "format": "time_series", "hide": false, "instant": false, @@ -3891,7 +3879,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_connect_connect_metrics_connection_count{namespace=\"$ns\",app=\"$connect_app\",pod=~\"$connect_worker\"}", + "expr": "kafka_connect_connect_metrics_connection_count{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\"}", "format": "time_series", "hide": false, "instant": false, @@ -3991,7 +3979,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_connect_connect_metrics_successful_authentication_rate{namespace=\"$ns\",app=\"$connect_app\",pod=~\"$connect_worker\"}", + "expr": "kafka_connect_connect_metrics_successful_authentication_rate{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\"}", "format": "time_series", "hide": false, "instant": false, @@ -4005,7 +3993,7 @@ }, { "datasource": null, - "expr": "kafka_connect_connect_metrics_failed_authentication_total{namespace=\"$ns\",app=\"$connect_app\",pod=~\"$connect_worker\"}", + "expr": "kafka_connect_connect_metrics_failed_authentication_total{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\"}", "format": "time_series", "hide": false, "instant": false, @@ -4063,9 +4051,9 @@ "datasource": "Prometheus", "hide": 0, "includeAll": false, - "label": "Namespace", + "label": "Environment", "multi": false, - "name": "ns", + "name": "env", "options": [], "query": "label_values(namespace)", "refresh": 1, @@ -4092,9 +4080,9 @@ "includeAll": false, "label": "Connect cluster", "multi": false, - "name": "connect_app", + "name": "connect_cluster", "options": [], - "query": "label_values(kafka_connect_connect_worker_metrics_connector_total_task_count{namespace=\"$ns\"}, app)", + "query": "label_values(kafka_connect_connect_worker_metrics_connector_total_task_count{namespace=\"$env\"}, app)", "refresh": 1, "regex": null, "sort": 1, @@ -4121,7 +4109,7 @@ "multi": true, "name": "connect_worker", "options": [], - "query": "label_values(kafka_connect_connect_worker_metrics_connector_total_task_count{namespace=\"$ns\",app=\"$connect_app\"}, pod)", + "query": "label_values(kafka_connect_connect_worker_metrics_connector_total_task_count{namespace=\"$env\",app=\"$connect_cluster\"}, pod)", "refresh": 1, "regex": null, "sort": 1, @@ -4148,7 +4136,7 @@ "multi": true, "name": "connector", "options": [], - "query": "label_values(kafka_connect_connect_worker_metrics_connector_total_task_count{namespace=\"$ns\",app=\"$connect_app\"}, connector)", + "query": "label_values(kafka_connect_connect_worker_metrics_connector_total_task_count{namespace=\"$env\",app=\"$connect_cluster\"}, connector)", "refresh": 1, "regex": null, "sort": 1, diff --git a/grafana-dashboards/kafka-topics.json b/grafana-dashboards/cfk/kafka-topics.json similarity index 97% rename from grafana-dashboards/kafka-topics.json rename to grafana-dashboards/cfk/kafka-topics.json index d241d624..19c187b5 100644 --- a/grafana-dashboards/kafka-topics.json +++ b/grafana-dashboards/cfk/kafka-topics.json @@ -142,7 +142,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10, sum without(instance,pod,statefulset_kubernetes_io_pod_name) (rate(kafka_server_brokertopicmetrics_messagesinpersec{topic=~\"$topic\",namespace=\"$ns\"}[5m])))", + "expr": "topk(10, sum without(instance,pod,statefulset_kubernetes_io_pod_name) (rate(kafka_server_brokertopicmetrics_messagesinpersec{topic=~\"$topic\",namespace=\"$env\"}[5m])))", "format": "time_series", "hide": false, "instant": false, @@ -244,7 +244,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10, sum(kafka_log_log_size{namespace=\"$ns\",topic=~\"$topic\"}) by (topic))", + "expr": "topk(10, sum(kafka_log_log_size{namespace=\"$env\",topic=~\"$topic\"}) by (topic))", "format": "time_series", "hide": false, "instant": false, @@ -346,7 +346,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10, sum without(instance,pod,statefulset_kubernetes_io_pod_name) (rate(kafka_server_brokertopicmetrics_bytesinpersec{topic=~\"$topic\",namespace=\"$ns\"}[5m])))", + "expr": "topk(10, sum without(instance,pod,statefulset_kubernetes_io_pod_name) (rate(kafka_server_brokertopicmetrics_bytesinpersec{topic=~\"$topic\",namespace=\"$env\"}[5m])))", "format": "time_series", "hide": false, "instant": false, @@ -448,7 +448,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10, sum without(instance,pod,statefulset_kubernetes_io_pod_name) (rate(kafka_server_brokertopicmetrics_bytesoutpersec{topic=~\"$topic\",namespace=\"$ns\"}[5m])))", + "expr": "topk(10, sum without(instance,pod,statefulset_kubernetes_io_pod_name) (rate(kafka_server_brokertopicmetrics_bytesoutpersec{topic=~\"$topic\",namespace=\"$env\"}[5m])))", "format": "time_series", "hide": false, "instant": false, @@ -550,7 +550,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10, sum(rate(kafka_server_brokertopicmetrics_totalproducerequestspersec{ namespace=\"$ns\", topic=~\"$topic\"}[5m])) by (topic))", + "expr": "topk(10, sum(rate(kafka_server_brokertopicmetrics_totalproducerequestspersec{ namespace=\"$env\", topic=~\"$topic\"}[5m])) by (topic))", "format": "time_series", "hide": false, "instant": false, @@ -652,7 +652,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10, sum(rate(kafka_server_brokertopicmetrics_totalfetchrequestspersec{ namespace=\"$ns\", topic=~\"$topic\"}[5m])) by (topic))", + "expr": "topk(10, sum(rate(kafka_server_brokertopicmetrics_totalfetchrequestspersec{ namespace=\"$env\", topic=~\"$topic\"}[5m])) by (topic))", "format": "time_series", "hide": false, "instant": false, @@ -751,7 +751,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_log_log_logstartoffset{namespace=\"$ns\",topic=~\"$topic\"}", + "expr": "kafka_log_log_logstartoffset{namespace=\"$env\",topic=~\"$topic\"}", "format": "table", "hide": false, "instant": true, @@ -884,7 +884,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_log_log_logendoffset{namespace=\"$ns\",topic=~\"$topic\"}", + "expr": "kafka_log_log_logendoffset{namespace=\"$env\",topic=~\"$topic\"}", "format": "table", "hide": false, "instant": true, @@ -1006,9 +1006,9 @@ "datasource": "Prometheus", "hide": 0, "includeAll": false, - "label": "Namespace", + "label": "Environment", "multi": false, - "name": "ns", + "name": "env", "options": [], "query": "label_values(namespace)", "refresh": 1, @@ -1037,7 +1037,7 @@ "multi": true, "name": "topic", "options": [], - "query": "label_values(kafka_log_log_size{namespace=\"$ns\"}, topic)", + "query": "label_values(kafka_log_log_size{namespace=\"$env\"}, topic)", "refresh": 1, "regex": null, "sort": 1, diff --git a/grafana-dashboards/ksqldb-cluster.json b/grafana-dashboards/cfk/ksqldb-cluster.json similarity index 96% rename from grafana-dashboards/ksqldb-cluster.json rename to grafana-dashboards/cfk/ksqldb-cluster.json index 194352c0..56b4f887 100644 --- a/grafana-dashboards/ksqldb-cluster.json +++ b/grafana-dashboards/cfk/ksqldb-cluster.json @@ -123,7 +123,7 @@ "targets": [ { "datasource": null, - "expr": "count(ksql_ksql_engine_query_stats_num_active_queries{namespace=\"$ns\", app=\"$ksqldb_app\"})", + "expr": "count(ksql_ksql_engine_query_stats_num_active_queries{namespace=\"$env\", app=\"$ksqldb_cluster\"})", "format": "time_series", "hide": false, "instant": false, @@ -206,7 +206,7 @@ "targets": [ { "datasource": null, - "expr": "sum(ksql_ksql_engine_query_stats_num_active_queries{namespace=\"$ns\", app=\"$ksqldb_app\"})", + "expr": "sum(ksql_ksql_engine_query_stats_num_active_queries{namespace=\"$env\", app=\"$ksqldb_cluster\"})", "format": "time_series", "hide": false, "instant": false, @@ -297,7 +297,7 @@ "targets": [ { "datasource": null, - "expr": "sum(ksql_ksql_engine_query_stats_running_queries{namespace=\"$ns\", app=\"$ksqldb_app\"})", + "expr": "sum(ksql_ksql_engine_query_stats_running_queries{namespace=\"$env\", app=\"$ksqldb_cluster\"})", "format": "time_series", "hide": false, "instant": false, @@ -388,7 +388,7 @@ "targets": [ { "datasource": null, - "expr": "sum(ksql_ksql_engine_query_stats_rebalancing_queries{namespace=\"$ns\", app=\"$ksqldb_app\"})", + "expr": "sum(ksql_ksql_engine_query_stats_rebalancing_queries{namespace=\"$env\", app=\"$ksqldb_cluster\"})", "format": "time_series", "hide": false, "instant": false, @@ -479,7 +479,7 @@ "targets": [ { "datasource": null, - "expr": "avg(ksql_ksql_engine_query_stats_error_queries{namespace=\"$ns\", app=\"$ksqldb_app\"})", + "expr": "avg(ksql_ksql_engine_query_stats_error_queries{namespace=\"$env\", app=\"$ksqldb_cluster\"})", "format": "time_series", "hide": false, "instant": false, @@ -579,7 +579,7 @@ "targets": [ { "datasource": null, - "expr": "ksql_ksql_engine_query_stats_liveness_indicator{namespace=\"$ns\",app=\"$ksqldb_app\"}", + "expr": "ksql_ksql_engine_query_stats_liveness_indicator{namespace=\"$env\",app=\"$ksqldb_cluster\"}", "format": "time_series", "hide": false, "instant": false, @@ -679,7 +679,7 @@ "targets": [ { "datasource": null, - "expr": "ksql_ksql_engine_query_stats_messages_consumed_per_sec{namespace=\"$ns\",app=\"$ksqldb_app\"}", + "expr": "ksql_ksql_engine_query_stats_messages_consumed_per_sec{namespace=\"$env\",app=\"$ksqldb_cluster\"}", "format": "time_series", "hide": false, "instant": false, @@ -779,7 +779,7 @@ "targets": [ { "datasource": null, - "expr": "ksql_ksql_engine_query_stats_messages_produced_per_sec{namespace=\"$ns\",app=\"$ksqldb_app\"}", + "expr": "ksql_ksql_engine_query_stats_messages_produced_per_sec{namespace=\"$env\",app=\"$ksqldb_cluster\"}", "format": "time_series", "hide": false, "instant": false, @@ -920,7 +920,7 @@ "targets": [ { "datasource": null, - "expr": "irate(process_cpu_seconds_total{namespace=\"$ns\",app=\"$ksqldb_app\",type=\"ksqldb\"}[5m])", + "expr": "irate(process_cpu_seconds_total{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\"}[5m])", "format": "time_series", "hide": false, "instant": false, @@ -1020,7 +1020,7 @@ "targets": [ { "datasource": null, - "expr": "sum without(area)(jvm_memory_bytes_used{namespace=\"$ns\",app=\"$ksqldb_app\",type=\"ksqldb\"})", + "expr": "sum without(area)(jvm_memory_bytes_used{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\"})", "format": "time_series", "hide": false, "instant": false, @@ -1120,7 +1120,7 @@ "targets": [ { "datasource": null, - "expr": "sum without(gc)(irate(jvm_gc_collection_seconds_sum{namespace=\"$ns\",app=\"$ksqldb_app\",type=\"ksqldb\"}[5m]))", + "expr": "sum without(gc)(irate(jvm_gc_collection_seconds_sum{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\"}[5m]))", "format": "time_series", "hide": false, "instant": false, @@ -1250,7 +1250,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_streams_stream_thread_metrics_poll_latency_avg{namespace=\"$ns\",app=\"$ksqldb_app\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "expr": "kafka_streams_stream_thread_metrics_poll_latency_avg{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", "hide": false, "instant": false, @@ -1350,7 +1350,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_streams_stream_thread_metrics_poll_latency_max{namespace=\"$ns\",app=\"$ksqldb_app\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "expr": "kafka_streams_stream_thread_metrics_poll_latency_max{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", "hide": false, "instant": false, @@ -1450,7 +1450,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_streams_stream_thread_metrics_process_latency_avg{namespace=\"$ns\",app=\"$ksqldb_app\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "expr": "kafka_streams_stream_thread_metrics_process_latency_avg{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", "hide": false, "instant": false, @@ -1550,7 +1550,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_streams_stream_thread_metrics_process_latency_max{namespace=\"$ns\",app=\"$ksqldb_app\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "expr": "kafka_streams_stream_thread_metrics_process_latency_max{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", "hide": false, "instant": false, @@ -1650,7 +1650,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_streams_stream_thread_metrics_commit_latency_avg{namespace=\"$ns\",app=\"$ksqldb_app\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "expr": "kafka_streams_stream_thread_metrics_commit_latency_avg{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", "hide": false, "instant": false, @@ -1750,7 +1750,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_streams_stream_thread_metrics_commit_latency_max{namespace=\"$ns\",app=\"$ksqldb_app\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "expr": "kafka_streams_stream_thread_metrics_commit_latency_max{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", "hide": false, "instant": false, @@ -1850,7 +1850,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_streams_stream_thread_metrics_punctuate_latency_avg{namespace=\"$ns\",app=\"$ksqldb_app\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "expr": "kafka_streams_stream_thread_metrics_punctuate_latency_avg{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", "hide": false, "instant": false, @@ -1950,7 +1950,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_streams_stream_thread_metrics_punctuate_latency_max{namespace=\"$ns\",app=\"$ksqldb_app\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "expr": "kafka_streams_stream_thread_metrics_punctuate_latency_max{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", "hide": false, "instant": false, @@ -2054,7 +2054,7 @@ "mode": "absolute", "steps": [] }, - "unit": "iops" + "unit": "ops" }, "overrides": [] }, @@ -2092,7 +2092,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_streams_stream_state_metrics_put_rate{namespace=\"$ns\",app=\"$ksqldb_app\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "expr": "kafka_streams_stream_state_metrics_put_rate{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", "hide": false, "instant": false, @@ -2192,7 +2192,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_streams_stream_state_metrics_put_latency_avg{namespace=\"$ns\",app=\"$ksqldb_app\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "expr": "kafka_streams_stream_state_metrics_put_latency_avg{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", "hide": false, "instant": false, @@ -2292,7 +2292,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_streams_stream_state_metrics_put_latency_max{namespace=\"$ns\",app=\"$ksqldb_app\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "expr": "kafka_streams_stream_state_metrics_put_latency_max{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", "hide": false, "instant": false, @@ -2354,7 +2354,7 @@ "mode": "absolute", "steps": [] }, - "unit": "iops" + "unit": "ops" }, "overrides": [] }, @@ -2392,7 +2392,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_streams_stream_state_metrics_put_if_absent_rate{namespace=\"$ns\",app=\"$ksqldb_app\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "expr": "kafka_streams_stream_state_metrics_put_if_absent_rate{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", "hide": false, "instant": false, @@ -2492,7 +2492,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_streams_stream_state_metrics_put_if_absent_latency_avg{namespace=\"$ns\",app=\"$ksqldb_app\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "expr": "kafka_streams_stream_state_metrics_put_if_absent_latency_avg{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", "hide": false, "instant": false, @@ -2592,7 +2592,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_streams_stream_state_metrics_put_if_absent_latency_max{namespace=\"$ns\",app=\"$ksqldb_app\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "expr": "kafka_streams_stream_state_metrics_put_if_absent_latency_max{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", "hide": false, "instant": false, @@ -2654,7 +2654,7 @@ "mode": "absolute", "steps": [] }, - "unit": "iops" + "unit": "ops" }, "overrides": [] }, @@ -2692,7 +2692,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_streams_stream_state_metrics_fetch_rate{namespace=\"$ns\",app=\"$ksqldb_app\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "expr": "kafka_streams_stream_state_metrics_fetch_rate{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", "hide": false, "instant": false, @@ -2792,7 +2792,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_streams_stream_state_metrics_fetch_latency_avg{namespace=\"$ns\",app=\"$ksqldb_app\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "expr": "kafka_streams_stream_state_metrics_fetch_latency_avg{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", "hide": false, "instant": false, @@ -2892,7 +2892,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_streams_stream_state_metrics_fetch_latency_max{namespace=\"$ns\",app=\"$ksqldb_app\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "expr": "kafka_streams_stream_state_metrics_fetch_latency_max{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", "hide": false, "instant": false, @@ -2954,7 +2954,7 @@ "mode": "absolute", "steps": [] }, - "unit": "iops" + "unit": "ops" }, "overrides": [] }, @@ -2992,7 +2992,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_streams_stream_state_metrics_delete_rate{namespace=\"$ns\",app=\"$ksqldb_app\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "expr": "kafka_streams_stream_state_metrics_delete_rate{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", "hide": false, "instant": false, @@ -3092,7 +3092,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_streams_stream_state_metrics_delete_latency_avg{namespace=\"$ns\",app=\"$ksqldb_app\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "expr": "kafka_streams_stream_state_metrics_delete_latency_avg{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", "hide": false, "instant": false, @@ -3192,7 +3192,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_streams_stream_state_metrics_delete_latency_max{namespace=\"$ns\",app=\"$ksqldb_app\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "expr": "kafka_streams_stream_state_metrics_delete_latency_max{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", "hide": false, "instant": false, @@ -3254,7 +3254,7 @@ "mode": "absolute", "steps": [] }, - "unit": "iops" + "unit": "ops" }, "overrides": [] }, @@ -3292,7 +3292,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_streams_stream_state_metrics_restore_rate{namespace=\"$ns\",app=\"$ksqldb_app\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "expr": "kafka_streams_stream_state_metrics_restore_rate{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", "hide": false, "instant": false, @@ -3392,7 +3392,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_streams_stream_state_metrics_restore_latency_avg{namespace=\"$ns\",app=\"$ksqldb_app\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "expr": "kafka_streams_stream_state_metrics_restore_latency_avg{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", "hide": false, "instant": false, @@ -3492,7 +3492,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_streams_stream_state_metrics_restore_latency_max{namespace=\"$ns\",app=\"$ksqldb_app\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "expr": "kafka_streams_stream_state_metrics_restore_latency_max{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", "hide": false, "instant": false, @@ -3550,9 +3550,9 @@ "datasource": "Prometheus", "hide": 0, "includeAll": false, - "label": "Namespace", + "label": "Environment", "multi": false, - "name": "ns", + "name": "env", "options": [], "query": "label_values(namespace)", "refresh": 1, @@ -3579,9 +3579,9 @@ "includeAll": false, "label": "ksqlDB cluster", "multi": false, - "name": "ksqldb_app", + "name": "ksqldb_cluster", "options": [], - "query": "label_values(ksql_ksql_engine_query_stats_num_active_queries{namespace=\"$ns\"},app)", + "query": "label_values(ksql_ksql_engine_query_stats_num_active_queries{namespace=\"$env\"},app)", "refresh": 1, "regex": null, "sort": 1, @@ -3602,13 +3602,13 @@ "value": null }, "datasource": "Prometheus", - "hide": true, + "hide": 2, "includeAll": false, "label": "ksqlDB cluster ID", "multi": false, "name": "ksqldb_cluster_id", "options": [], - "query": "label_values(ksql_ksql_engine_query_stats_num_active_queries{namespace=\"$ns\"},ksql_cluster)", + "query": "label_values(ksql_ksql_engine_query_stats_num_active_queries{namespace=\"$env\"},ksql_cluster)", "refresh": 1, "regex": null, "sort": 1, @@ -3635,7 +3635,7 @@ "multi": true, "name": "ksqldb_server", "options": [], - "query": "label_values(ksql_ksql_engine_query_stats_num_active_queries{namespace=\"$ns\",app=\"$ksqldb_app\"}, pod)", + "query": "label_values(ksql_ksql_engine_query_stats_num_active_queries{namespace=\"$env\",app=\"$ksqldb_cluster\"}, pod)", "refresh": 1, "regex": null, "sort": 1, diff --git a/grafana-dashboards/schema-registry-cluster.json b/grafana-dashboards/cfk/schema-registry-cluster.json similarity index 95% rename from grafana-dashboards/schema-registry-cluster.json rename to grafana-dashboards/cfk/schema-registry-cluster.json index 973062cd..760aaf71 100644 --- a/grafana-dashboards/schema-registry-cluster.json +++ b/grafana-dashboards/cfk/schema-registry-cluster.json @@ -55,7 +55,7 @@ "targets": [], "timeFrom": null, "timeShift": null, - "title": "Health-check", + "title": "Overview", "transformations": [], "transparent": false, "type": "row" @@ -139,7 +139,7 @@ "targets": [ { "datasource": null, - "expr": "count(kafka_schema_registry_registered_count{namespace=\"$ns\"})", + "expr": "count(kafka_schema_registry_registered_count{namespace=\"$env\"})", "format": "time_series", "hide": false, "instant": false, @@ -222,7 +222,7 @@ "targets": [ { "datasource": null, - "expr": "avg(kafka_schema_registry_registered_count{namespace=\"$ns\"})", + "expr": "avg(kafka_schema_registry_registered_count{namespace=\"$env\"})", "format": "time_series", "hide": false, "instant": true, @@ -305,7 +305,7 @@ "targets": [ { "datasource": null, - "expr": "avg(kafka_schema_registry_schemas_created{namespace=\"$ns\"}) by (schema_type)", + "expr": "avg(kafka_schema_registry_schemas_created{namespace=\"$env\"}) by (schema_type)", "format": "time_series", "hide": false, "instant": false, @@ -388,7 +388,7 @@ "targets": [ { "datasource": null, - "expr": "sum(kafka_schema_registry_schemas_deleted{namespace=\"$ns\"}) by (schema_type)", + "expr": "sum(kafka_schema_registry_schemas_deleted{namespace=\"$env\"}) by (schema_type)", "format": "time_series", "hide": false, "instant": false, @@ -612,7 +612,7 @@ "targets": [ { "datasource": null, - "expr": "irate(process_cpu_seconds_total{namespace=\"$ns\",type=\"schemaregistry\"}[5m])", + "expr": "irate(process_cpu_seconds_total{namespace=\"$env\",pod=~\"$sr_server\"}[5m])", "format": "time_series", "hide": false, "instant": false, @@ -712,7 +712,7 @@ "targets": [ { "datasource": null, - "expr": "sum without(area)(jvm_memory_bytes_used{namespace=\"$ns\",type=\"schemaregistry\"})", + "expr": "sum without(area)(jvm_memory_bytes_used{namespace=\"$env\",pod=~\"$sr_server\"})", "format": "time_series", "hide": false, "instant": false, @@ -812,7 +812,7 @@ "targets": [ { "datasource": null, - "expr": "sum without(gc)(irate(jvm_gc_collection_seconds_sum{namespace=\"$ns\",type=\"schemaregistry\"}[5m]))", + "expr": "sum without(gc)(irate(jvm_gc_collection_seconds_sum{namespace=\"$env\",pod=~\"$sr_server\"}[5m]))", "format": "time_series", "hide": false, "instant": false, @@ -858,9 +858,9 @@ "datasource": "Prometheus", "hide": 0, "includeAll": false, - "label": "Namespace", + "label": "Environment", "multi": false, - "name": "ns", + "name": "env", "options": [], "query": "label_values(namespace)", "refresh": 1, @@ -870,6 +870,33 @@ "tagsQuery": null, "type": "query", "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "Prometheus", + "hide": 0, + "includeAll": true, + "label": "Server", + "multi": true, + "name": "sr_server", + "options": [], + "query": "label_values(kafka_schema_registry_registered_count{namespace=\"$env\"}, pod)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false } ] }, diff --git a/grafana-dashboards/zookeeper-cluster.json b/grafana-dashboards/cfk/zookeeper-cluster.json similarity index 97% rename from grafana-dashboards/zookeeper-cluster.json rename to grafana-dashboards/cfk/zookeeper-cluster.json index e9d5bd1a..235e17c0 100644 --- a/grafana-dashboards/zookeeper-cluster.json +++ b/grafana-dashboards/cfk/zookeeper-cluster.json @@ -55,7 +55,7 @@ "targets": [], "timeFrom": null, "timeShift": null, - "title": "Health-check", + "title": "Overview", "transformations": [], "transparent": false, "type": "row" @@ -139,7 +139,7 @@ "targets": [ { "datasource": null, - "expr": "count(zookeeper_status_quorumsize{namespace=\"$ns\"})", + "expr": "count(zookeeper_status_quorumsize{namespace=\"$env\"})", "format": "time_series", "hide": false, "instant": false, @@ -222,7 +222,7 @@ "targets": [ { "datasource": null, - "expr": "avg(zookeeper_inmemorydatatree_nodecount{namespace=\"$ns\"})", + "expr": "avg(zookeeper_inmemorydatatree_nodecount{namespace=\"$env\"})", "format": "time_series", "hide": false, "instant": false, @@ -305,7 +305,7 @@ "targets": [ { "datasource": null, - "expr": "sum(zookeeper_numaliveconnections{namespace=\"$ns\"})", + "expr": "sum(zookeeper_numaliveconnections{namespace=\"$env\"})", "format": "time_series", "hide": false, "instant": false, @@ -388,7 +388,7 @@ "targets": [ { "datasource": null, - "expr": "sum(zookeeper_inmemorydatatree_watchcount{namespace=\"$ns\"})", + "expr": "sum(zookeeper_inmemorydatatree_watchcount{namespace=\"$env\"})", "format": "time_series", "hide": false, "instant": false, @@ -487,7 +487,7 @@ "targets": [ { "datasource": null, - "expr": "zookeeper_outstandingrequests{namespace=\"$ns\"}", + "expr": "zookeeper_outstandingrequests{namespace=\"$env\"}", "format": "time_series", "hide": false, "instant": false, @@ -628,7 +628,7 @@ "targets": [ { "datasource": null, - "expr": "irate(process_cpu_seconds_total{namespace=\"$ns\",type=\"zookeeper\"}[5m])", + "expr": "irate(process_cpu_seconds_total{namespace=\"$env\",pod=~\"$zk_server\"}[5m])", "format": "time_series", "hide": false, "instant": false, @@ -728,7 +728,7 @@ "targets": [ { "datasource": null, - "expr": "sum without(area)(jvm_memory_bytes_used{namespace=\"$ns\",type=\"zookeeper\"})", + "expr": "sum without(area)(jvm_memory_bytes_used{namespace=\"$env\",pod=\"$zk_server\"})", "format": "time_series", "hide": false, "instant": false, @@ -828,7 +828,7 @@ "targets": [ { "datasource": null, - "expr": "sum without(gc)(irate(jvm_gc_collection_seconds_sum{namespace=\"$ns\",type=\"zookeeper\"}[5m]))", + "expr": "sum without(gc)(irate(jvm_gc_collection_seconds_sum{namespace=\"$env\",pod=\"$zk_server\"}[5m]))", "format": "time_series", "hide": false, "instant": false, @@ -958,7 +958,7 @@ "targets": [ { "datasource": null, - "expr": "zookeeper_minrequestlatency{namespace=\"$ns\"} * zookeeper_ticktime", + "expr": "zookeeper_minrequestlatency{namespace=\"$env\"} * zookeeper_ticktime", "format": "time_series", "hide": false, "instant": false, @@ -1058,7 +1058,7 @@ "targets": [ { "datasource": null, - "expr": "zookeeper_avgrequestlatency{namespace=\"$ns\"} * zookeeper_ticktime", + "expr": "zookeeper_avgrequestlatency{namespace=\"$env\"} * zookeeper_ticktime", "format": "time_series", "hide": false, "instant": false, @@ -1158,7 +1158,7 @@ "targets": [ { "datasource": null, - "expr": "zookeeper_maxrequestlatency{namespace=\"$ns\"} * zookeeper_ticktime", + "expr": "zookeeper_maxrequestlatency{namespace=\"$env\"} * zookeeper_ticktime", "format": "time_series", "hide": false, "instant": false, @@ -1300,7 +1300,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_server_zookeeperclientmetrics_zookeeperrequestlatencyms{namespace=\"$ns\",quantile=~\"$quantile\"}", + "expr": "kafka_server_zookeeperclientmetrics_zookeeperrequestlatencyms{namespace=\"$env\",quantile=~\"$quantile\"}", "format": "time_series", "hide": false, "instant": false, @@ -1402,7 +1402,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_server_sessionexpirelistener_zookeepersyncconnectspersec{namespace=\"$ns\"}", + "expr": "kafka_server_sessionexpirelistener_zookeepersyncconnectspersec{namespace=\"$env\"}", "format": "time_series", "hide": false, "instant": false, @@ -1504,7 +1504,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_server_sessionexpirelistener_zookeeperexpirespersec{namespace=\"$ns\"}", + "expr": "kafka_server_sessionexpirelistener_zookeeperexpirespersec{namespace=\"$env\"}", "format": "time_series", "hide": false, "instant": false, @@ -1606,7 +1606,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_server_sessionexpirelistener_zookeeperdisconnectspersec{namespace=\"$ns\"}", + "expr": "kafka_server_sessionexpirelistener_zookeeperdisconnectspersec{namespace=\"$env\"}", "format": "time_series", "hide": false, "instant": false, @@ -1708,7 +1708,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_server_sessionexpirelistener_zookeeperauthfailurespersec{namespace=\"$ns\"}", + "expr": "kafka_server_sessionexpirelistener_zookeeperauthfailurespersec{namespace=\"$env\"}", "format": "time_series", "hide": false, "instant": false, @@ -1767,9 +1767,9 @@ "datasource": "Prometheus", "hide": 0, "includeAll": false, - "label": "Namespace", + "label": "Environment", "multi": false, - "name": "ns", + "name": "env", "options": [], "query": "label_values(namespace)", "refresh": 1, @@ -1780,6 +1780,33 @@ "type": "query", "useTags": false }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "Prometheus", + "hide": 0, + "includeAll": true, + "label": "Server", + "multi": true, + "name": "zk_server", + "options": [], + "query": "label_values(zookeeper_outstandingrequests{namespace=\"$env\"}, pod)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + }, { "allValue": null, "auto": false, @@ -1798,7 +1825,7 @@ "multi": false, "name": "quantile", "options": [], - "query": "label_values(kafka_server_zookeeperclientmetrics_zookeeperrequestlatencyms{namespace=\"$ns\"}, quantile)", + "query": "label_values(kafka_server_zookeeperclientmetrics_zookeeperrequestlatencyms{namespace=\"$env\"}, quantile)", "refresh": 1, "regex": null, "sort": 1, diff --git a/grafana-dashboards/confluent-platform.py b/grafana-dashboards/confluent-platform.py index 15ec7b2e..1de19e9f 100644 --- a/grafana-dashboards/confluent-platform.py +++ b/grafana-dashboards/confluent-platform.py @@ -1,488 +1,583 @@ +import os import grafanalib.core as G -defaultHeight = 5 -statWidth = 4 -templating = G.Templating( - list=[ - G.Template( - name="ns", - label="Namespace", - dataSource="Prometheus", - query="label_values(namespace)", +def dashboard( + env_label="namespace", + server_label="pod", + connect_cluster_label="app", + ksqldb_cluster_label="app", +): + default_height = 5 + stat_width = 4 + + templating = G.Templating( + list=[ + G.Template( + name="env", + label="Environment", + dataSource="Prometheus", + query="label_values(" + env_label + ")", + ), + G.Template( + name="connect_cluster", + label="Kafka Connect cluster", + dataSource="Prometheus", + query="label_values(kafka_connect_connect_worker_metrics_connector_count{" + + env_label + + '="$env"}, ' + + connect_cluster_label + + ")", + hide=True, + ), + G.Template( + name="ksqldb_cluster", + label="ksqlDB cluster", + dataSource="Prometheus", + query="label_values(ksql_ksql_engine_query_stats_liveness_indicator{" + + env_label + + '="$env"}, ' + + ksqldb_cluster_label + + ")", + hide=True, + ), + ] + ) + + zk_panels = [ + G.RowPanel( + title="Zookeeper cluster", + gridPos=G.GridPos(h=1, w=24, x=0, y=0), + ), + G.Stat( + title="ZK: Quorum Size", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="count(zookeeper_status_quorumsize{" + env_label + '="$env"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="red"), + G.Threshold(index=1, value=2.0, color="yellow"), + G.Threshold(index=2, value=3.0, color="green"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 0, y=0), + ), + G.Stat( + title="ZK: Avg. number of ZNodes", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="avg(zookeeper_inmemorydatatree_nodecount{" + + env_label + + '="$env"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 1, y=0), + ), + G.Stat( + title="ZK: Sum of number of Alive Connections", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="sum(zookeeper_numaliveconnections{" + env_label + '="$env"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 2, y=0), ), - G.Template( - name="connect_app", - label="Kafka Connect cluster", - dataSource="Prometheus", - query='label_values(kafka_connect_connect_worker_metrics_connector_count{namespace="$ns"}, app)', - hide=2, + G.Stat( + title="ZK: Sum of watchers", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="sum(zookeeper_inmemorydatatree_watchcount{" + + env_label + + '="$env"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 3, y=0), ), - G.Template( - name="ksqldb_app", - label="ksqlDB cluster", - dataSource="Prometheus", - query='label_values(ksql_ksql_engine_query_stats_num_active_queries{namespace="$ns"},app)', - hide=2, + G.TimeSeries( + title="ZK: Outstanding Requests", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="zookeeper_outstandingrequests{" + env_label + '="$env"}', + legendFormat="{{pod}} ({{server_id}}:{{member_type}})", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "last"], + legendPlacement="right", + gridPos=G.GridPos(h=default_height, w=8, x=stat_width * 4, y=0), ), ] -) -zk_panels = [ - G.RowPanel( - title="Zookeeper cluster", - gridPos=G.GridPos(h=1, w=24, x=0, y=0), - ), - G.Stat( - title="ZK: Quorum Size", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='count(zookeeper_status_quorumsize{namespace="$ns"})', - ), - ], - reduceCalc="last", - thresholds=[ - G.Threshold(index=0, value=0.0, color="red"), - G.Threshold(index=1, value=2.0, color="yellow"), - G.Threshold(index=2, value=3.0, color="green"), - ], - gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 0, y=0), - ), - G.Stat( - title="ZK: Avg. number of ZNodes", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='avg(zookeeper_inmemorydatatree_nodecount{namespace="$ns"})', - ), - ], - reduceCalc="last", - thresholds=[ - G.Threshold(index=0, value=0.0, color="blue"), - ], - gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 1, y=0), - ), - G.Stat( - title="ZK: Sum of number of Alive Connections", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='sum(zookeeper_numaliveconnections{namespace="$ns"})', - ), - ], - reduceCalc="last", - thresholds=[ - G.Threshold(index=0, value=0.0, color="blue"), - ], - gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 2, y=0), - ), - G.Stat( - title="ZK: Sum of watchers", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='sum(zookeeper_inmemorydatatree_watchcount{namespace="$ns"})', - ), - ], - reduceCalc="last", - thresholds=[ - G.Threshold(index=0, value=0.0, color="blue"), - ], - gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 3, y=0), - ), - G.TimeSeries( - title="ZK: Outstanding Requests", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='zookeeper_outstandingrequests{namespace="$ns"}', - legendFormat="{{pod}} ({{server_id}}:{{member_type}})", - ), - ], - legendDisplayMode="table", - legendCalcs=["max", "last"], - legendPlacement="right", - gridPos=G.GridPos(h=defaultHeight, w=8, x=statWidth * 4, y=0), - ), -] + kafka_panels = [ + G.RowPanel( + title="Kafka cluster", + gridPos=G.GridPos(h=1, w=24, x=0, y=1), + ), + G.Stat( + title="Kafka: Online Brokers", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="count(kafka_server_replicamanager_leadercount{" + + env_label + + '="$env"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 0, y=1), + ), + G.Stat( + title="Kafka: Active Controller", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="kafka_controller_kafkacontroller_activecontrollercount{" + + env_label + + '="$env"} > 0', + legendFormat="{{" + server_label + "}}", + ), + ], + reduceCalc="last", + textMode="value_and_name", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 1, y=1), + ), + G.Stat( + title="Kafka: Sum of Partitioenv", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="sum(kafka_server_replicamanager_partitioncount{" + + env_label + + '="$env"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 2, y=1), + ), + G.Stat( + title="Kafka: Sum of Partitions Under-Replicated (URP)", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="sum(kafka_server_replicamanager_underreplicatedpartitions{" + + env_label + + '="$env"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="green"), + G.Threshold(index=1, value=1.0, color="red"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 3, y=1), + ), + G.Stat( + title="Kafka: Sum of Partitioenv Under-MinISR", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="sum(kafka_cluster_partition_underminisr{" + + env_label + + '="$env"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="green"), + G.Threshold(index=1, value=1.0, color="red"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 4, y=1), + ), + G.Stat( + title="Kafka: Sum of Partitions Offline", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="sum(kafka_controller_kafkacontroller_offlinepartitionscount{" + + env_label + + '="$env"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="green"), + G.Threshold(index=1, value=1.0, color="red"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 5, y=1), + ), + ] -kafka_panels = [ - G.RowPanel( - title="Kafka cluster", - gridPos=G.GridPos(h=1, w=24, x=0, y=1), - ), - G.Stat( - title="Kafka: Online Brokers", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='count(kafka_server_replicamanager_leadercount{namespace="$ns"})', - ), - ], - reduceCalc="last", - thresholds=[ - G.Threshold(index=0, value=0.0, color="blue"), - ], - gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 0, y=1), - ), - G.Stat( - title="Kafka: Active Controller", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_controller_kafkacontroller_activecontrollercount{namespace="$ns"} > 0', - legendFormat="{{pod}}", - ), - ], - reduceCalc="last", - textMode="value_and_name", - thresholds=[ - G.Threshold(index=0, value=0.0, color="blue"), - ], - gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 1, y=1), - ), - G.Stat( - title="Kafka: Sum of Partitions", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='sum(kafka_server_replicamanager_partitioncount{namespace="$ns"})', - ), - ], - reduceCalc="last", - thresholds=[ - G.Threshold(index=0, value=0.0, color="blue"), - ], - gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 2, y=1), - ), - G.Stat( - title="Kafka: Sum of Partitions Under-Replicated (URP)", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='sum(kafka_server_replicamanager_underreplicatedpartitions{namespace="$ns"})', - ), - ], - reduceCalc="last", - thresholds=[ - G.Threshold(index=0, value=0.0, color="green"), - G.Threshold(index=1, value=1.0, color="red"), - ], - gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 3, y=1), - ), - G.Stat( - title="Kafka: Sum of Partitions Under-MinISR", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='sum(kafka_cluster_partition_underminisr{namespace="$ns"})', - ), - ], - reduceCalc="last", - thresholds=[ - G.Threshold(index=0, value=0.0, color="green"), - G.Threshold(index=1, value=1.0, color="red"), - ], - gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 4, y=1), - ), - G.Stat( - title="Kafka: Sum of Partitions Offline", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='sum(kafka_controller_kafkacontroller_offlinepartitionscount{namespace="$ns"})', - ), - ], - reduceCalc="last", - thresholds=[ - G.Threshold(index=0, value=0.0, color="green"), - G.Threshold(index=1, value=1.0, color="red"), - ], - gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 5, y=1), - ), -] + sr_panels = [ + G.RowPanel( + title="Schema Registry cluster", + gridPos=G.GridPos(h=1, w=24, x=0, y=2), + ), + G.Stat( + title="SR: Online instances", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="count(kafka_schema_registry_registered_count{" + + env_label + + '="$env"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="red"), + G.Threshold(index=1, value=1.0, color="yellow"), + G.Threshold(index=2, value=2.0, color="green"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 0, y=2), + ), + G.Stat( + title="SR: Sum of Registered Schemas", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="avg(kafka_schema_registry_registered_count{" + + env_label + + '="$env"})', + instant=True, + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 1, y=2), + ), + G.Stat( + title="SR: Sum of Created Schemas by Type", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="avg(kafka_schema_registry_schemas_created{" + + env_label + + '="$env"}) by (schema_type)', + legendFormat="{{schema_type}}", + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 2, y=2), + ), + G.Stat( + title="SR: Sum of Deleted Schemas by Type", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="sum(kafka_schema_registry_schemas_deleted{" + + env_label + + '="$env"}) by (schema_type)', + legendFormat="{{schema_type}}", + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 3, y=2), + ), + ] -sr_panels = [ - G.RowPanel( - title="Schema Registry cluster", - gridPos=G.GridPos(h=1, w=24, x=0, y=2), - ), - G.Stat( - title="SR: Online instances", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='count(kafka_schema_registry_registered_count{namespace="$ns"})', - ), - ], - reduceCalc="last", - thresholds=[ - G.Threshold(index=0, value=0.0, color="red"), - G.Threshold(index=1, value=1.0, color="yellow"), - G.Threshold(index=2, value=2.0, color="green"), - ], - gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 0, y=2), - ), - G.Stat( - title="SR: Sum of Registered Schemas", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='avg(kafka_schema_registry_registered_count{namespace="$ns"})', - instant=True, - ), - ], - reduceCalc="last", - thresholds=[ - G.Threshold(index=0, value=0.0, color="blue"), - ], - gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 1, y=2), - ), - G.Stat( - title="SR: Sum of Created Schemas by Type", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='avg(kafka_schema_registry_schemas_created{namespace="$ns"}) by (schema_type)', - legendFormat="{{schema_type}}", - ), - ], - reduceCalc="last", - thresholds=[ - G.Threshold(index=0, value=0.0, color="blue"), - ], - gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 2, y=2), - ), - G.Stat( - title="SR: Sum of Deleted Schemas by Type", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='sum(kafka_schema_registry_schemas_deleted{namespace="$ns"}) by (schema_type)', - legendFormat="{{schema_type}}", - ), - ], - reduceCalc="last", - thresholds=[ - G.Threshold(index=0, value=0.0, color="blue"), - ], - gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 3, y=2), - ), -] + connect_inner = [ + G.Stat( + title="Connect: Online Workers", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="count(kafka_connect_connect_worker_metrics_connector_count{" + + env_label + + '="$env",' + + connect_cluster_label + + '=~"$connect_cluster"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 0, y=3), + ), + G.Stat( + title="Connect: Sum of Total Tasks", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="sum(kafka_connect_connect_worker_metrics_connector_total_task_count{" + + env_label + + '="$env",' + + connect_cluster_label + + '=~"$connect_cluster"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 1, y=3), + ), + G.Stat( + title="Connect: Sum of Running Tasks", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="sum(kafka_connect_connect_worker_metrics_connector_running_task_count{" + + env_label + + '="$env",' + + connect_cluster_label + + '=~"$connect_cluster"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + G.Threshold(index=1, value=1.0, color="green"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 2, y=3), + ), + G.Stat( + title="Connect: Sum of Paused Tasks", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="sum(kafka_connect_connect_worker_metrics_connector_paused_task_count{" + + env_label + + '="$env",' + + connect_cluster_label + + '=~"$connect_cluster"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + G.Threshold(index=1, value=1.0, color="yellow"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 3, y=3), + ), + G.Stat( + title="Connect: Sum of Failed Tasks", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="sum(kafka_connect_connect_worker_metrics_connector_failed_task_count{" + + env_label + + '="$env",' + + connect_cluster_label + + '=~"$connect_cluster"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + G.Threshold(index=1, value=1.0, color="red"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 4, y=3), + ), + G.Stat( + title="Connect: Time since last rebalance", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="kafka_connect_connect_worker_rebalance_metrics_time_since_last_rebalance_ms{" + + env_label + + '="$env",' + + connect_cluster_label + + '=~"$connect_cluster"} >= 0', + legendFormat="{{pod}}", + ), + ], + reduceCalc="last", + format="clockms", + graphMode="none", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 5, y=3), + ), + ] -connect_inner = [ - G.Stat( - title="Connect: Online Workers", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='count(kafka_connect_connect_worker_metrics_connector_count{namespace="$ns",app=~"$connect_app"})', - ), - ], - reduceCalc="last", - thresholds=[ - G.Threshold(index=0, value=0.0, color="blue"), - ], - gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 0, y=3), - ), - G.Stat( - title="Connect: Sum of Total Tasks", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='sum(kafka_connect_connect_worker_metrics_connector_total_task_count{namespace="$ns",app=~"$connect_app"})', - ), - ], - reduceCalc="last", - thresholds=[ - G.Threshold(index=0, value=0.0, color="blue"), - ], - gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 1, y=3), - ), - G.Stat( - title="Connect: Sum of Running Tasks", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='sum(kafka_connect_connect_worker_metrics_connector_running_task_count{namespace="$ns",app=~"$connect_app"})', - ), - ], - reduceCalc="last", - thresholds=[ - G.Threshold(index=0, value=0.0, color="blue"), - G.Threshold(index=1, value=1.0, color="green"), - ], - gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 2, y=3), - ), - G.Stat( - title="Connect: Sum of Paused Tasks", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='sum(kafka_connect_connect_worker_metrics_connector_paused_task_count{namespace="$ns",app=~"$connect_app"})', - ), - ], - reduceCalc="last", - thresholds=[ - G.Threshold(index=0, value=0.0, color="blue"), - G.Threshold(index=1, value=1.0, color="yellow"), - ], - gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 3, y=3), - ), - G.Stat( - title="Connect: Sum of Failed Tasks", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='sum(kafka_connect_connect_worker_metrics_connector_failed_task_count{namespace="$ns",app=~"$connect_app"})', - ), - ], - reduceCalc="last", - thresholds=[ - G.Threshold(index=0, value=0.0, color="blue"), - G.Threshold(index=1, value=1.0, color="red"), - ], - gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 4, y=3), - ), - G.Stat( - title="Connect: Time since last rebalance", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_connect_connect_worker_rebalance_metrics_time_since_last_rebalance_ms{namespace="$ns",app=~"$connect_app"} >= 0', - legendFormat="{{pod}}", - ), - ], - reduceCalc="last", - format="clockms", - graphMode="none", - thresholds=[ - G.Threshold(index=0, value=0.0, color="blue"), - ], - gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 5, y=3), - ), -] + connect_panels = [ + G.RowPanel( + title="Kafka Connect cluster: $connect_cluster", + gridPos=G.GridPos(h=1, w=24, x=0, y=3), + repeat=G.Repeat(variable="connect_cluster"), + collapsed=True, + panels=connect_inner, + ), + ] -connect_panels = [ - G.RowPanel( - title="Kafka Connect cluster: $connect_app", - gridPos=G.GridPos(h=1, w=24, x=0, y=3), - repeat=G.Repeat(variable="connect_app"), - collapsed=True, - panels=connect_inner, - ), -] + ksqldb_inner = [ + G.Stat( + title="ksqlDB: Online Servers", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="count(ksql_ksql_engine_query_stats_num_active_queries{" + + env_label + + '="$env", ' + + ksqldb_cluster_label + + '="$ksqldb_cluster"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 0, y=4), + ), + G.Stat( + title="ksqlDB: Sum of Active Queries", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="sum(ksql_ksql_engine_query_stats_num_active_queries{" + + env_label + + '="$env", ' + + ksqldb_cluster_label + + '="$ksqldb_cluster"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 1, y=4), + ), + G.Stat( + title="ksqlDB: Sum of Running Queries", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="sum(ksql_ksql_engine_query_stats_running_queries{" + + env_label + + '="$env", ' + + ksqldb_cluster_label + + '="$ksqldb_cluster"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + G.Threshold(index=1, value=1.0, color="green"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 2, y=4), + ), + G.Stat( + title="ksqlDB: Sum of Rebalancing Queries", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="sum(ksql_ksql_engine_query_stats_rebalancing_queries{" + + env_label + + '="$env", ' + + ksqldb_cluster_label + + '="$ksqldb_cluster"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + G.Threshold(index=1, value=1.0, color="yellow"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 3, y=4), + ), + G.Stat( + title="Connect: Sum of Failed Queries", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="avg(ksql_ksql_engine_query_stats_error_queries{" + + env_label + + '="$env", ' + + ksqldb_cluster_label + + '="$ksqldb_cluster"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + G.Threshold(index=1, value=1.0, color="red"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 4, y=4), + ), + ] -ksqldb_inner = [ - G.Stat( - title="ksqlDB: Online Servers", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='count(ksql_ksql_engine_query_stats_num_active_queries{namespace="$ns", app="$ksqldb_app"})', - ), - ], - reduceCalc="last", - thresholds=[ - G.Threshold(index=0, value=0.0, color="blue"), - ], - gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 0, y=4), - ), - G.Stat( - title="ksqlDB: Sum of Active Queries", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='sum(ksql_ksql_engine_query_stats_num_active_queries{namespace="$ns", app="$ksqldb_app"})', - ), - ], - reduceCalc="last", - thresholds=[ - G.Threshold(index=0, value=0.0, color="blue"), - ], - gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 1, y=4), - ), - G.Stat( - title="ksqlDB: Sum of Running Queries", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='sum(ksql_ksql_engine_query_stats_running_queries{namespace="$ns", app="$ksqldb_app"})', - ), - ], - reduceCalc="last", - thresholds=[ - G.Threshold(index=0, value=0.0, color="blue"), - G.Threshold(index=1, value=1.0, color="green"), - ], - gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 2, y=4), - ), - G.Stat( - title="ksqlDB: Sum of Rebalancing Queries", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='sum(ksql_ksql_engine_query_stats_rebalancing_queries{namespace="$ns", app="$ksqldb_app"})', - ), - ], - reduceCalc="last", - thresholds=[ - G.Threshold(index=0, value=0.0, color="blue"), - G.Threshold(index=1, value=1.0, color="yellow"), - ], - gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 3, y=4), - ), - G.Stat( - title="Connect: Sum of Failed Queries", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='avg(ksql_ksql_engine_query_stats_error_queries{namespace="$ns", app="$ksqldb_app"})', - ), - ], - reduceCalc="last", - thresholds=[ - G.Threshold(index=0, value=0.0, color="blue"), - G.Threshold(index=1, value=1.0, color="red"), - ], - gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 4, y=4), - ), -] + ksqldb_panels = [ + G.RowPanel( + title="ksqlDB cluster: $ksqldb_cluster", + gridPos=G.GridPos(h=1, w=24, x=0, y=4), + repeat=G.Repeat(variable="ksqldb_cluster"), + collapsed=True, + panels=ksqldb_inner, + ), + ] -ksqldb_panels = [ - G.RowPanel( - title="ksqlDB cluster: $ksqldb_app", - gridPos=G.GridPos(h=1, w=24, x=0, y=4), - repeat=G.Repeat(variable="ksqldb_app"), - collapsed=True, - panels=ksqldb_inner, - ), -] + panels = zk_panels + kafka_panels + sr_panels + connect_panels + ksqldb_panels -panels = zk_panels + kafka_panels + sr_panels + connect_panels + ksqldb_panels + return G.Dashboard( + title="Confluent Platform overview - v2", + description="Overview of the main health-check metrics from Confluent Platform components.", + tags=[ + "confluent", + "kafka", + "zookeeper", + "kafka-connect", + "schema-registry", + "ksqldb", + ], + inputs=[ + G.DataSourceInput( + name="DS_PROMETHEUS", + label="Prometheus", + pluginId="prometheus", + pluginName="Prometheus", + ) + ], + templating=templating, + timezone="browser", + panels=panels, + refresh="30s", + ).auto_panel_ids() -dashboard = G.Dashboard( - title="Confluent Platform overview - v2", - description="Overview of the main health-check metrics from Confluent Platform components.", - tags=[ - "confluent", - "kafka", - "zookeeper", - "kafka-connect", - "schema-registry", - "ksqldb", - ], - inputs=[ - G.DataSourceInput( - name="DS_PROMETHEUS", - label="Prometheus", - pluginId="prometheus", - pluginName="Prometheus", - ) - ], - templating=templating, - timezone="browser", - panels=panels, - refresh="30s", -).auto_panel_ids() + +env_label = os.environ.get("ENV_LABEL", "env") +server_label = os.environ.get("SERVER_LABEL", "hostname") +connect_cluster_label = os.environ.get( + "CONNECT_CLUSTER_LABEL", "kafka_connect_cluster_id" +) +ksqldb_cluster_label = os.environ.get("KSQLDB_CLUSTER_LABEL", "ksqldb_cluster_id") +dashboard = dashboard( + env_label, server_label, connect_cluster_label, ksqldb_cluster_label +) diff --git a/grafana-dashboards/default/confluent-platform.json b/grafana-dashboards/default/confluent-platform.json new file mode 100644 index 00000000..28a286ae --- /dev/null +++ b/grafana-dashboards/default/confluent-platform.json @@ -0,0 +1,2640 @@ +{ + "__inputs": [ + { + "description": "", + "label": "Prometheus", + "name": "DS_PROMETHEUS", + "pluginId": "prometheus", + "pluginName": "Prometheus", + "type": "datasource" + } + ], + "annotations": { + "list": [] + }, + "description": "Overview of the main health-check metrics from Confluent Platform components.", + "editable": true, + "gnetId": null, + "hideControls": false, + "id": null, + "links": [], + "panels": [ + { + "cacheTimeout": null, + "collapsed": false, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 1, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Zookeeper cluster", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 2.0, + "yaxis": "left" + }, + { + "color": "green", + "index": 2, + "line": true, + "op": "gt", + "value": 3.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 2, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "count(zookeeper_status_quorumsize{env=\"$env\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ZK: Quorum Size", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 4, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 3, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "avg(zookeeper_inmemorydatatree_nodecount{env=\"$env\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ZK: Avg. number of ZNodes", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 8, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 4, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(zookeeper_numaliveconnections{env=\"$env\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ZK: Sum of number of Alive Connections", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 12, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 5, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(zookeeper_inmemorydatatree_watchcount{env=\"$env\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ZK: Sum of watchers", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 8, + "x": 16, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 6, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "last" + ], + "displayMode": "table", + "placement": "right" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "zookeeper_outstandingrequests{env=\"$env\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}} ({{server_id}}:{{member_type}})", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ZK: Outstanding Requests", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "collapsed": false, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 7, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Kafka cluster", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 0, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 8, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "count(kafka_server_replicamanager_leadercount{env=\"$env\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Online Brokers", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 4, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 9, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "value_and_name" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_controller_kafkacontroller_activecontrollercount{env=\"$env\"} > 0", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Active Controller", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 8, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 10, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_server_replicamanager_partitioncount{env=\"$env\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Sum of Partitioenv", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "red", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 12, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 11, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_server_replicamanager_underreplicatedpartitions{env=\"$env\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Sum of Partitions Under-Replicated (URP)", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "red", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 16, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 12, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_cluster_partition_underminisr{env=\"$env\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Sum of Partitioenv Under-MinISR", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "red", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 20, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 13, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_controller_kafkacontroller_offlinepartitionscount{env=\"$env\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Sum of Partitions Offline", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "collapsed": false, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 14, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Schema Registry cluster", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + }, + { + "color": "green", + "index": 2, + "line": true, + "op": "gt", + "value": 2.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 0, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 15, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "count(kafka_schema_registry_registered_count{env=\"$env\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "SR: Online instances", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 4, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 16, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "avg(kafka_schema_registry_registered_count{env=\"$env\"})", + "format": "time_series", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "SR: Sum of Registered Schemas", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 8, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 17, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "avg(kafka_schema_registry_schemas_created{env=\"$env\"}) by (schema_type)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{schema_type}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "SR: Sum of Created Schemas by Type", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 12, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 18, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_schema_registry_schemas_deleted{env=\"$env\"}) by (schema_type)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{schema_type}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "SR: Sum of Deleted Schemas by Type", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 19, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 0, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 20, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "count(kafka_connect_connect_worker_metrics_connector_count{env=\"$env\",kafka_connect_cluster_id=~\"$connect_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connect: Online Workers", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 4, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 21, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_connect_connect_worker_metrics_connector_total_task_count{env=\"$env\",kafka_connect_cluster_id=~\"$connect_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connect: Sum of Total Tasks", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "green", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 8, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 22, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_connect_connect_worker_metrics_connector_running_task_count{env=\"$env\",kafka_connect_cluster_id=~\"$connect_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connect: Sum of Running Tasks", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 12, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 23, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_connect_connect_worker_metrics_connector_paused_task_count{env=\"$env\",kafka_connect_cluster_id=~\"$connect_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connect: Sum of Paused Tasks", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "red", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 16, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 24, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_connect_connect_worker_metrics_connector_failed_task_count{env=\"$env\",kafka_connect_cluster_id=~\"$connect_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connect: Sum of Failed Tasks", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "clockms" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 20, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 25, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_connect_worker_rebalance_metrics_time_since_last_rebalance_ms{env=\"$env\",kafka_connect_cluster_id=~\"$connect_cluster\"} >= 0", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connect: Time since last rebalance", + "transformations": [], + "transparent": false, + "type": "stat" + } + ], + "repeat": "connect_cluster", + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Kafka Connect cluster: $connect_cluster", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 26, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 0, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 27, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "count(ksql_ksql_engine_query_stats_num_active_queries{env=\"$env\", ksqldb_cluster_id=\"$ksqldb_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ksqlDB: Online Servers", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 4, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 28, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(ksql_ksql_engine_query_stats_num_active_queries{env=\"$env\", ksqldb_cluster_id=\"$ksqldb_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ksqlDB: Sum of Active Queries", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "green", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 8, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 29, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(ksql_ksql_engine_query_stats_running_queries{env=\"$env\", ksqldb_cluster_id=\"$ksqldb_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ksqlDB: Sum of Running Queries", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 12, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 30, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(ksql_ksql_engine_query_stats_rebalancing_queries{env=\"$env\", ksqldb_cluster_id=\"$ksqldb_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ksqlDB: Sum of Rebalancing Queries", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "red", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 16, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 31, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "avg(ksql_ksql_engine_query_stats_error_queries{env=\"$env\", ksqldb_cluster_id=\"$ksqldb_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connect: Sum of Failed Queries", + "transformations": [], + "transparent": false, + "type": "stat" + } + ], + "repeat": "ksqldb_cluster", + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "ksqlDB cluster: $ksqldb_cluster", + "transformations": [], + "transparent": false, + "type": "row" + } + ], + "refresh": "30s", + "rows": [], + "schemaVersion": 12, + "sharedCrosshair": false, + "style": "dark", + "tags": [ + "confluent", + "kafka", + "zookeeper", + "kafka-connect", + "schema-registry", + "ksqldb" + ], + "templating": { + "list": [ + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "Prometheus", + "hide": 0, + "includeAll": false, + "label": "Environment", + "multi": false, + "name": "env", + "options": [], + "query": "label_values(env)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "Prometheus", + "hide": true, + "includeAll": false, + "label": "Kafka Connect cluster", + "multi": false, + "name": "connect_cluster", + "options": [], + "query": "label_values(kafka_connect_connect_worker_metrics_connector_count{env=\"$env\"}, kafka_connect_cluster_id)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "Prometheus", + "hide": true, + "includeAll": false, + "label": "ksqlDB cluster", + "multi": false, + "name": "ksqldb_cluster", + "options": [], + "query": "label_values(ksql_ksql_engine_query_stats_liveness_indicator{env=\"$env\"}, ksqldb_cluster_id)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "hidden": false, + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Confluent Platform overview - v2", + "uid": null, + "version": 0 +} diff --git a/grafana-dashboards/default/kafka-cluster.json b/grafana-dashboards/default/kafka-cluster.json new file mode 100644 index 00000000..25b5f484 --- /dev/null +++ b/grafana-dashboards/default/kafka-cluster.json @@ -0,0 +1,5537 @@ +{ + "__inputs": [ + { + "description": "", + "label": "Prometheus", + "name": "DS_PROMETHEUS", + "pluginId": "prometheus", + "pluginName": "Prometheus", + "type": "datasource" + } + ], + "annotations": { + "list": [] + }, + "description": "Overview of the Kafka cluster", + "editable": true, + "gnetId": null, + "hideControls": false, + "id": null, + "links": [], + "panels": [ + { + "cacheTimeout": null, + "collapsed": false, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 1, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Overview", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 2, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "count(kafka_server_replicamanager_leadercount{env=\"$env\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Online Brokers", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 4, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 3, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "value_and_name" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_controller_kafkacontroller_activecontrollercount{env=\"$env\"} > 0", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Active Controller", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 8, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 4, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_controller_kafkacontroller_preferredreplicaimbalancecount{env=\"$env\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Sum of Replica Imbalance", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 12, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 5, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_controller_kafkacontroller_globaltopiccount{env=\"$env\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Sum of Topics", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 16, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 6, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(rate(kafka_network_requestmetrics_requestspersec{env=\"$env\",hostname=~\"$broker\"}[5m]))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Rate of Requests/Sec", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 20, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 7, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "value_and_name" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_log_log_size{env=\"$env\",hostname=~\"$broker\"}) by (hostname)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Logs Size", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 0, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 8, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_server_replicamanager_partitioncount{env=\"$env\",hostname=~\"$broker\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Sum of Partitions", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "red", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 4, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 9, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_server_replicamanager_underreplicatedpartitions{env=\"$env\",hostname=~\"$broker\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Sum of Partitions Under-Replicated (URP)", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "red", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 8, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 10, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_cluster_partition_underminisr{env=\"$env\",hostname=~\"$broker\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Sum of Partitions Under-MinISR", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "red", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 12, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 11, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_controller_kafkacontroller_offlinepartitionscount{env=\"$env\",hostname=~\"$broker\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Sum of Partitions Offline", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 16, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 12, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(rate(kafka_server_brokertopicmetrics_bytesinpersec{env=\"$env\",hostname=~\"$broker\"}[5m]))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Bytes In/Sec", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 20, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 13, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(rate(kafka_server_brokertopicmetrics_bytesoutpersec{env=\"$env\",hostname=~\"$broker\"}[5m]))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Bytes Out/Sec", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "collapsed": false, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 14, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "System", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 15, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "irate(process_cpu_seconds_total{env=\"$env\",hostname=~\"$broker\"}[5m])", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU usage", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 16, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum without(area)(jvm_memory_bytes_used{env=\"$env\",hostname=~\"$broker\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory usage", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 17, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum without(gc)(irate(jvm_gc_collection_seconds_sum{env=\"$env\",hostname=~\"$broker\"}[5m]))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "GC collection", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": "Bytes in/out per second", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 18, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 19, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum without(topic) (rate(kafka_server_brokertopicmetrics_messagesinpersec{env=\"$env\",hostname=~\"$broker\"}[5m]))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Messages In/Sec", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 20, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum without(topic) (rate(kafka_server_brokertopicmetrics_bytesinpersec{env=\"$env\",hostname=~\"$broker\"}[5m]))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Bytes In/Sec", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 21, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum without(topic) (rate(kafka_server_brokertopicmetrics_bytesoutpersec{env=\"$env\",hostname=~\"$broker\"}[5m]))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Bytes Out/Sec", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Throughput", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": "Internal thread pools usage", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 22, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 23, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "1-kafka_network_socketserver_networkprocessoravgidlepercent{env=\"$env\",hostname=~\"$broker\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Network processor usage", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 24, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "1-kafka_server_kafkarequesthandlerpool_requesthandleravgidlepercent_total{env=\"$env\",hostname=~\"$broker\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Request processor (IO) usage", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Thread Utilization", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": "Sum of req/sec rates", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 5 + }, + "height": null, + "hideTimeOverride": false, + "id": 25, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 5 + }, + "height": null, + "hideTimeOverride": false, + "id": 26, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum without(pod,instance,statefulset_kubernetes_io_pod_name)(rate(kafka_network_requestmetrics_requestspersec{env=\"$env\",hostname=~\"$broker\"}[5m]))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{request}}(v{{version}})", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Requests rates", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 5 + }, + "height": null, + "hideTimeOverride": false, + "id": 27, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum without(pod,instance,statefulset_kubernetes_io_pod_name)(rate(kafka_network_requestmetrics_errorspersec{env=\"$env\",hostname=~\"$broker\",error!=\"NONE\"}[5m]))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{error}}@{{request}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Error rates", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Request rates", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 6 + }, + "height": null, + "hideTimeOverride": false, + "id": 28, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 6 + }, + "height": null, + "hideTimeOverride": false, + "id": 29, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_server_socketservermetrics_connection_count{env=\"$env\",hostname=~\"$broker\"}) by (hostname)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Sum of Connections alive per Broker", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 6 + }, + "height": null, + "hideTimeOverride": false, + "id": 30, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_server_socketservermetrics_connection_creation_rate{env=\"$env\",hostname=~\"$broker\"}) by (hostname)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Sum of Connections creation rate per Broker", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 6 + }, + "height": null, + "hideTimeOverride": false, + "id": 31, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_server_socketservermetrics_connection_close_rate{env=\"$env\",hostname=~\"$broker\"}) by (hostname)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Sum of Connections close rate per Broker", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 7 + }, + "height": null, + "hideTimeOverride": false, + "id": 32, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_server_socketservermetrics_connection_count{env=\"$env\",hostname=~\"$broker\"}) by (listener)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{listener}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Sum of Connections alive per Listener", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 7 + }, + "height": null, + "hideTimeOverride": false, + "id": 33, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_server_socketservermetrics_connection_creation_rate{env=\"$env\",hostname=~\"$broker\"}) by (listener)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{listener}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Sum of Connections creation rate per Listener", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 7 + }, + "height": null, + "hideTimeOverride": false, + "id": 34, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_server_socketservermetrics_connection_close_rate{env=\"$env\",hostname=~\"$broker\"}) by (listener)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{listener}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Sum of Connections close rate per Listener", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Connections", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 35, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 36, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "rate(kafka_server_replicamanager_isrshrinkspersec{env=\"$env\",hostname=~\"$broker\"}[5m])", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of ISR Shrinks/sec", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 37, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "rate(kafka_server_replicamanager_isrexpandspersec{env=\"$env\",hostname=~\"$broker\"}[5m])", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Rate of ISR Expands/sec", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "In-Sync Replicas", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 9 + }, + "height": null, + "hideTimeOverride": false, + "id": 38, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 9 + }, + "height": null, + "hideTimeOverride": false, + "id": 39, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_network_requestmetrics_requestqueuetimems{env=\"$env\",hostname=~\"$broker\",quantile=~\"$quantile\",request=\"Produce\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Produce: Request Queue Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 9 + }, + "height": null, + "hideTimeOverride": false, + "id": 40, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_network_requestmetrics_localtimems{env=\"$env\",hostname=~\"$broker\",quantile=~\"$quantile\",request=\"Produce\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Produce: Local Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 9 + }, + "height": null, + "hideTimeOverride": false, + "id": 41, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_network_requestmetrics_remotetimems{env=\"$env\",hostname=~\"$broker\",quantile=~\"$quantile\",request=\"Produce\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Produce: Remote Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 10 + }, + "height": null, + "hideTimeOverride": false, + "id": 42, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_network_requestmetrics_responsequeuetimems{env=\"$env\",hostname=~\"$broker\",quantile=~\"$quantile\",request=\"Produce\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Produce: Response Queue Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 10 + }, + "height": null, + "hideTimeOverride": false, + "id": 43, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_network_requestmetrics_responsesendtimems{env=\"$env\",hostname=~\"$broker\",quantile=~\"$quantile\",request=\"Produce\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Produce: Response Send Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Request latency: Producer", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 11 + }, + "height": null, + "hideTimeOverride": false, + "id": 44, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 11 + }, + "height": null, + "hideTimeOverride": false, + "id": 45, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_network_requestmetrics_requestqueuetimems{env=\"$env\",hostname=~\"$broker\",quantile=~\"$quantile\",request=\"Fetch\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Fetch: Request Queue Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 11 + }, + "height": null, + "hideTimeOverride": false, + "id": 46, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_network_requestmetrics_localtimems{env=\"$env\",hostname=~\"$broker\",quantile=~\"$quantile\",request=\"Fetch\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Fetch: Local Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 11 + }, + "height": null, + "hideTimeOverride": false, + "id": 47, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_network_requestmetrics_remotetimems{env=\"$env\",hostname=~\"$broker\",quantile=~\"$quantile\",request=\"Fetch\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Fetch: Remote Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 12 + }, + "height": null, + "hideTimeOverride": false, + "id": 48, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_network_requestmetrics_responsequeuetimems{env=\"$env\",hostname=~\"$broker\",quantile=~\"$quantile\",request=\"Fetch\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Fetch: Response Queue Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 12 + }, + "height": null, + "hideTimeOverride": false, + "id": 49, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_network_requestmetrics_responsesendtimems{env=\"$env\",hostname=~\"$broker\",quantile=~\"$quantile\",request=\"Fetch\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Fetch: Response Send Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Request latency: Consumer Fetch", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 13 + }, + "height": null, + "hideTimeOverride": false, + "id": 50, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 13 + }, + "height": null, + "hideTimeOverride": false, + "id": 51, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_network_requestmetrics_requestqueuetimems{env=\"$env\",hostname=~\"$broker\",quantile=~\"$quantile\",request=\"FetchFollower\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Fetch: Request Queue Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 13 + }, + "height": null, + "hideTimeOverride": false, + "id": 52, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_network_requestmetrics_localtimems{env=\"$env\",hostname=~\"$broker\",quantile=~\"$quantile\",request=\"FetchFollower\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Fetch: Local Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 13 + }, + "height": null, + "hideTimeOverride": false, + "id": 53, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_network_requestmetrics_remotetimems{env=\"$env\",hostname=~\"$broker\",quantile=~\"$quantile\",request=\"FetchFollower\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Fetch: Remote Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 14 + }, + "height": null, + "hideTimeOverride": false, + "id": 54, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_network_requestmetrics_responsequeuetimems{env=\"$env\",hostname=~\"$broker\",quantile=~\"$quantile\",request=\"FetchFollower\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Fetch: Response Queue Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 14 + }, + "height": null, + "hideTimeOverride": false, + "id": 55, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_network_requestmetrics_responsesendtimems{env=\"$env\",hostname=~\"$broker\",quantile=~\"$quantile\",request=\"FetchFollower\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Fetch: Response Send Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Request latency: Replica Fetch", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 15 + }, + "height": null, + "hideTimeOverride": false, + "id": 56, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 15 + }, + "height": null, + "hideTimeOverride": false, + "id": 57, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_coordinator_group_groupmetadatamanager_numgroups{env=\"$env\",hostname=~\"$broker\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Number of Groups per Broker", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 15 + }, + "height": null, + "hideTimeOverride": false, + "id": 58, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupsstable{env=\"$env\",hostname=~\"$broker\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "stable", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupspreparingrebalance{env=\"$env\",hostname=~\"$broker\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "preparing_rebalance", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupsdead{env=\"$env\",hostname=~\"$broker\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "dead", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupscompletingrebalance{env=\"$env\",hostname=~\"$broker\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "completing_rebalance", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupsempty{env=\"$env\",hostname=~\"$broker\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "empty", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Number of Groups per Broker", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Group Coordinator", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 16 + }, + "height": null, + "hideTimeOverride": false, + "id": 59, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "opsps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 16 + }, + "height": null, + "hideTimeOverride": false, + "id": 60, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_server_brokertopicmetrics_producemessageconversionspersec{env=\"$env\",hostname=~\"$broker\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Sum of Produce conversion rate per sec", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "opsps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 16 + }, + "height": null, + "hideTimeOverride": false, + "id": 61, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_server_brokertopicmetrics_fetchmessageconversionspersec{env=\"$env\",hostname=~\"$broker\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Sum of Fetch conversion rate per sec", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 16 + }, + "height": null, + "hideTimeOverride": false, + "id": 62, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_server_socketservermetrics_connections{env=\"$env\",hostname=~\"$broker\"}) by (client_software_name,client_software_version)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_software_name}} (v{{client_software_version}})", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Sum of Connections per version", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Message Conversion", + "transformations": [], + "transparent": false, + "type": "row" + } + ], + "refresh": "30s", + "rows": [], + "schemaVersion": 12, + "sharedCrosshair": false, + "style": "dark", + "tags": [ + "confluent", + "kafka" + ], + "templating": { + "list": [ + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "Prometheus", + "hide": 0, + "includeAll": false, + "label": "Environment", + "multi": false, + "name": "env", + "options": [], + "query": "label_values(env)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "Prometheus", + "hide": 0, + "includeAll": true, + "label": "Broker", + "multi": true, + "name": "broker", + "options": [], + "query": "label_values(kafka_server_replicamanager_leadercount{env=\"$env\"}, hostname)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "Prometheus", + "hide": 0, + "includeAll": false, + "label": "Quantile", + "multi": false, + "name": "quantile", + "options": [], + "query": "label_values(kafka_network_requestmetrics_requestqueuetimems{env=\"$env\"}, quantile)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "hidden": false, + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Kafka cluster - v2", + "uid": null, + "version": 0 +} diff --git a/grafana-dashboards/default/kafka-connect-cluster.json b/grafana-dashboards/default/kafka-connect-cluster.json new file mode 100644 index 00000000..5b9b8544 --- /dev/null +++ b/grafana-dashboards/default/kafka-connect-cluster.json @@ -0,0 +1,4184 @@ +{ + "__inputs": [ + { + "description": "", + "label": "Prometheus", + "name": "DS_PROMETHEUS", + "pluginId": "prometheus", + "pluginName": "Prometheus", + "type": "datasource" + } + ], + "annotations": { + "list": [] + }, + "description": "Overview of the Kafka Connect cluster", + "editable": true, + "gnetId": null, + "hideControls": false, + "id": null, + "links": [], + "panels": [ + { + "cacheTimeout": null, + "collapsed": false, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 1, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Overview", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 2, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "count(kafka_connect_app_info{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",version!=\"\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connect: Online Workers", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 4, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 3, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_connect_connect_worker_metrics_connector_total_task_count{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connect: Sum of Total Tasks", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "green", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 8, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 4, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_connect_connect_worker_metrics_connector_running_task_count{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connect: Sum of Running Tasks", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 12, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 5, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_connect_connect_worker_metrics_connector_paused_task_count{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connect: Sum of Paused Tasks", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "red", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 16, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 6, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_connect_connect_worker_metrics_connector_failed_task_count{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connect: Sum of Failed Tasks", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "clockms" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 20, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 7, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_connect_worker_rebalance_metrics_time_since_last_rebalance_ms{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"} >= 0", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connect: Time since last rebalance", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "color": { + "mode": "thresholds" + }, + "columns": [], + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": { + "align": "auto", + "displayMode": "auto", + "filterable": false + }, + "thresholds": { + "mode": "absolute", + "steps": [] + } + }, + "overrides": [] + }, + "fontSize": "100%", + "gridPos": { + "h": 5, + "w": 24, + "x": 0, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 8, + "interval": null, + "links": [], + "mappings": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "showHeader": true + }, + "repeat": null, + "repeatDirection": null, + "span": 6, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_app_info{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",start_time_ms!=\"\"}", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "kafka_connect_app_info{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",version!=\"\"}", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "sum by (hostname) (kafka_connect_connect_worker_metrics_connector_count{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"})", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "sum by (hostname) (kafka_connect_connect_worker_metrics_connector_startup_success_total{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"})", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "sum by (hostname) (kafka_connect_connect_worker_metrics_connector_startup_failure_total{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"})", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "sum by (hostname) (kafka_connect_connect_worker_metrics_task_count{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"})", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "sum by (hostname) (kafka_connect_connect_worker_metrics_task_startup_success_total{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"})", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "sum by (hostname) (kafka_connect_connect_worker_metrics_task_startup_failure_total{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"})", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connect Workers", + "transformations": [ + { + "id": "seriesToColumns", + "options": { + "byField": "hostname" + } + }, + { + "id": "filterFieldsByName", + "options": { + "include": { + "names": [ + "hostname", + "kafka_connect_cluster_id 1", + "start_time_ms", + "version", + "Value #C", + "Value #D", + "Value #E", + "Value #F", + "Value #G", + "Value #H", + "env 1" + ] + } + } + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "env 1": 0, + "hostname": 2, + "kafka_connect_cluster_id 1": 1, + "start_time_ms": 3, + "version": 4 + }, + "renameByName": { + "Value #C": "connectors", + "Value #D": "conn. success", + "Value #E": "conn. failure", + "Value #F": "tasks", + "Value #G": "tasks success", + "Value #H": "tasks failure", + "env 1": "environment", + "hostname": "worker", + "kafka_connect_cluster_id 1": "cluster", + "start_time_ms": "start time", + "version": "version" + } + } + } + ], + "transparent": false, + "type": "table" + }, + { + "cacheTimeout": null, + "color": { + "mode": "thresholds" + }, + "columns": [], + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": { + "align": "auto", + "displayMode": "auto", + "filterable": false + }, + "thresholds": { + "mode": "absolute", + "steps": [] + } + }, + "overrides": [] + }, + "fontSize": "100%", + "gridPos": { + "h": 5, + "w": 24, + "x": 0, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 9, + "interval": null, + "links": [], + "mappings": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "showHeader": true + }, + "repeat": null, + "repeatDirection": null, + "span": 6, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_connector_info{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"}", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "sum by (connector) (kafka_connect_connect_worker_metrics_connector_total_task_count{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"})", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "sum by (connector) (kafka_connect_connect_worker_metrics_connector_running_task_count{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"})", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "sum by (connector) (kafka_connect_connect_worker_metrics_connector_failed_task_count{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"})", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "sum by (connector) (kafka_connect_connect_worker_metrics_connector_paused_task_count{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"})", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connectors", + "transformations": [ + { + "id": "seriesToColumns", + "options": { + "byField": "connector" + } + }, + { + "id": "filterFieldsByName", + "options": { + "include": { + "names": [ + "connector", + "Value #B", + "Value #C", + "Value #D", + "Value #E" + ] + } + } + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "renameByName": { + "Value #B": "tasks", + "Value #C": "running", + "Value #D": "failed", + "Value #E": "paused" + } + } + } + ], + "transparent": false, + "type": "table" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 10, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_connector_task_metrics_running_ratio{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{connector}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Tasks Running Ratio", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 11, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_connect_worker_rebalance_metrics_rebalance_avg_time_ms{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Rebalance Latency", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "collapsed": false, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 12, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "System", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 13, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "irate(process_cpu_seconds_total{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\"}[5m])", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU usage", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 14, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum without(area)(jvm_memory_bytes_used{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory usage", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 15, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum without(gc)(irate(jvm_gc_collection_seconds_sum{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\"}[5m]))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "GC collection", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 6 + }, + "height": null, + "hideTimeOverride": false, + "id": 16, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 6 + }, + "height": null, + "hideTimeOverride": false, + "id": 17, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_connector_task_metrics_batch_size_avg{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Batch Size (Avg.)", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 6 + }, + "height": null, + "hideTimeOverride": false, + "id": 18, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_connector_task_metrics_batch_size_max{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Batch Size (Max.)", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 7 + }, + "height": null, + "hideTimeOverride": false, + "id": 19, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_connector_task_metrics_offset_commit_success_percentage{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Offset commit success %", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 7 + }, + "height": null, + "hideTimeOverride": false, + "id": 20, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_connector_task_metrics_offset_commit_avg_time_ms{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Offset commit avg. latency", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Tasks", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 21, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 22, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_task_error_metrics_total_record_failures{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Total Record Failures", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 23, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_task_error_metrics_total_record_errors{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Total Record Error", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 24, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_task_error_metrics_total_records_skipped{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Total Records Skipped", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 9 + }, + "height": null, + "hideTimeOverride": false, + "id": 25, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_task_error_metrics_total_errors_logged{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Total Errors Logged", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 9 + }, + "height": null, + "hideTimeOverride": false, + "id": 26, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_task_error_metrics_total_retries{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Total Retries", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 9 + }, + "height": null, + "hideTimeOverride": false, + "id": 27, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_task_error_metrics_deadletterqueue_produce_requests{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Dead Letter Topic Requests", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Task Errors", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 10 + }, + "height": null, + "hideTimeOverride": false, + "id": 28, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 10 + }, + "height": null, + "hideTimeOverride": false, + "id": 29, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_source_task_metrics_poll_batch_avg_time_ms{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Poll Batch Avg. Latency", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 10 + }, + "height": null, + "hideTimeOverride": false, + "id": 30, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_source_task_metrics_poll_batch_max_time_ms{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Poll Batch Max. Latency", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ops" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 11 + }, + "height": null, + "hideTimeOverride": false, + "id": 31, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_source_task_metrics_source_record_poll_rate{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Source Record Poll Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ops" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 11 + }, + "height": null, + "hideTimeOverride": false, + "id": 32, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_source_task_metrics_source_record_write_rate{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Source Record Write Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Source Tasks", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 12 + }, + "height": null, + "hideTimeOverride": false, + "id": 33, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 12 + }, + "height": null, + "hideTimeOverride": false, + "id": 34, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_sink_task_metrics_put_batch_avg_time_ms{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Put Batch Avg. Latency", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 12 + }, + "height": null, + "hideTimeOverride": false, + "id": 35, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_sink_task_metrics_put_batch_max_time_ms{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Put Batch Max. Latency", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 13 + }, + "height": null, + "hideTimeOverride": false, + "id": 36, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_sink_task_metrics_partition_count{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Partition Count", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Sink Tasks", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 5 + }, + "height": null, + "hideTimeOverride": false, + "id": 37, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 5 + }, + "height": null, + "hideTimeOverride": false, + "id": 38, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_connect_metrics_incoming_byte_rate{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Incoming Byte Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 5 + }, + "height": null, + "hideTimeOverride": false, + "id": 39, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_connect_metrics_outgoing_byte_rate{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Outgoing Byte Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 6 + }, + "height": null, + "hideTimeOverride": false, + "id": 40, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_connect_metrics_io_ratio{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "IO Ratio", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 6 + }, + "height": null, + "hideTimeOverride": false, + "id": 41, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_connect_metrics_network_io_rate{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Network IO Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 7 + }, + "height": null, + "hideTimeOverride": false, + "id": 42, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_connect_metrics_connection_count{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Active Connections", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 7 + }, + "height": null, + "hideTimeOverride": false, + "id": 43, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_connect_metrics_successful_authentication_rate{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}} (success)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "kafka_connect_connect_metrics_failed_authentication_total{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}} (failed)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Authentications", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Connect Workers", + "transformations": [], + "transparent": false, + "type": "row" + } + ], + "refresh": "30s", + "rows": [], + "schemaVersion": 12, + "sharedCrosshair": false, + "style": "dark", + "tags": [ + "confluent", + "kafka-connect" + ], + "templating": { + "list": [ + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "Prometheus", + "hide": 0, + "includeAll": false, + "label": "Environment", + "multi": false, + "name": "env", + "options": [], + "query": "label_values(env)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "Prometheus", + "hide": 0, + "includeAll": false, + "label": "Connect cluster", + "multi": false, + "name": "connect_cluster", + "options": [], + "query": "label_values(kafka_connect_connect_worker_metrics_connector_total_task_count{env=\"$env\"}, kafka_connect_cluster_id)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "Prometheus", + "hide": 0, + "includeAll": true, + "label": "Connect worker", + "multi": true, + "name": "connect_worker", + "options": [], + "query": "label_values(kafka_connect_connect_worker_metrics_connector_total_task_count{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"}, hostname)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "Prometheus", + "hide": 0, + "includeAll": true, + "label": "Connector", + "multi": true, + "name": "connector", + "options": [], + "query": "label_values(kafka_connect_connect_worker_metrics_connector_total_task_count{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"}, connector)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "hidden": false, + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Kafka Connect cluster - v2", + "uid": null, + "version": 0 +} diff --git a/grafana-dashboards/default/kafka-topics.json b/grafana-dashboards/default/kafka-topics.json new file mode 100644 index 00000000..8c3b1527 --- /dev/null +++ b/grafana-dashboards/default/kafka-topics.json @@ -0,0 +1,1085 @@ +{ + "__inputs": [ + { + "description": "", + "label": "Prometheus", + "name": "DS_PROMETHEUS", + "pluginId": "prometheus", + "pluginName": "Prometheus", + "type": "datasource" + } + ], + "annotations": { + "list": [] + }, + "description": "Overview of the Kafka topics", + "editable": true, + "gnetId": null, + "hideControls": false, + "id": null, + "links": [], + "panels": [ + { + "cacheTimeout": null, + "collapsed": false, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 1, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Throughput", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 2, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10, sum without(instance,pod,statefulset_kubernetes_io_pod_name) (rate(kafka_server_brokertopicmetrics_messagesinpersec{topic=~\"$topic\",env=\"$env\"}[5m])))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{topic}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Messages In/Sec", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 3, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10, sum(kafka_log_log_size{env=\"$env\",topic=~\"$topic\"}) by (topic))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{topic}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Log size", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 4, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10, sum without(instance,pod,statefulset_kubernetes_io_pod_name) (rate(kafka_server_brokertopicmetrics_bytesinpersec{topic=~\"$topic\",env=\"$env\"}[5m])))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{topic}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Bytes In/Sec", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 5, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10, sum without(instance,pod,statefulset_kubernetes_io_pod_name) (rate(kafka_server_brokertopicmetrics_bytesoutpersec{topic=~\"$topic\",env=\"$env\"}[5m])))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{topic}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Bytes Out/Sec", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 6, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10, sum(rate(kafka_server_brokertopicmetrics_totalproducerequestspersec{ env=\"$env\", topic=~\"$topic\"}[5m])) by (topic))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{topic}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Produce Requests/Sec", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 7, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10, sum(rate(kafka_server_brokertopicmetrics_totalfetchrequestspersec{ env=\"$env\", topic=~\"$topic\"}[5m])) by (topic))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{topic}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Consumer Fetch Requests/Sec", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 8, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "color": { + "mode": "thresholds" + }, + "columns": [], + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": { + "align": "auto", + "displayMode": "auto", + "filterable": true + }, + "thresholds": { + "mode": "absolute", + "steps": [] + } + }, + "overrides": [] + }, + "fontSize": "100%", + "gridPos": { + "h": 10, + "w": 12, + "x": 0, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 9, + "interval": null, + "links": [], + "mappings": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "showHeader": true + }, + "repeat": null, + "repeatDirection": null, + "span": 6, + "targets": [ + { + "datasource": null, + "expr": "kafka_log_log_logstartoffset{env=\"$env\",topic=~\"$topic\"}", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{topic}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Start Offsets", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "__name__": true, + "app": true, + "clusterId": true, + "confluentPlatform": true, + "confluent_platform": true, + "controller_revision_hash": true, + "instance": true, + "job": true, + "namespace": true, + "platform_confluent_io_type": true, + "statefulset_kubernetes_io_pod_name": true, + "type": true + }, + "indexByName": { + "Value": 4, + "partition": 3, + "pod": 1, + "topic": 2 + }, + "renameByName": { + "Value": "offset" + } + } + }, + { + "id": "convertFieldType", + "options": { + "conversions": [ + { + "destinationType": "number", + "targetField": "partition" + } + ], + "fields": {} + } + }, + { + "id": "sortBy", + "options": { + "fields": {}, + "sort": [ + { + "field": "topic" + } + ] + } + }, + { + "id": "sortBy", + "options": { + "fields": {}, + "sort": [ + { + "field": "partition" + } + ] + } + } + ], + "transparent": false, + "type": "table" + }, + { + "cacheTimeout": null, + "color": { + "mode": "thresholds" + }, + "columns": [], + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": { + "align": "auto", + "displayMode": "auto", + "filterable": true + }, + "thresholds": { + "mode": "absolute", + "steps": [] + } + }, + "overrides": [] + }, + "fontSize": "100%", + "gridPos": { + "h": 10, + "w": 12, + "x": 12, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 10, + "interval": null, + "links": [], + "mappings": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "showHeader": true + }, + "repeat": null, + "repeatDirection": null, + "span": 6, + "targets": [ + { + "datasource": null, + "expr": "kafka_log_log_logendoffset{env=\"$env\",topic=~\"$topic\"}", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{topic}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "End Offsets", + "transformations": [ + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "__name__": true, + "app": true, + "clusterId": true, + "confluentPlatform": true, + "confluent_platform": true, + "controller_revision_hash": true, + "instance": true, + "job": true, + "namespace": true, + "platform_confluent_io_type": true, + "statefulset_kubernetes_io_pod_name": true, + "type": true + }, + "indexByName": { + "Value": 4, + "partition": 3, + "pod": 1, + "topic": 2 + }, + "renameByName": { + "Value": "offset" + } + } + }, + { + "id": "convertFieldType", + "options": { + "conversions": [ + { + "destinationType": "number", + "targetField": "partition" + } + ], + "fields": {} + } + }, + { + "id": "sortBy", + "options": { + "fields": {}, + "sort": [ + { + "field": "topic" + } + ] + } + }, + { + "id": "sortBy", + "options": { + "fields": {}, + "sort": [ + { + "field": "partition" + } + ] + } + } + ], + "transparent": false, + "type": "table" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Offsets", + "transformations": [], + "transparent": false, + "type": "row" + } + ], + "refresh": "30s", + "rows": [], + "schemaVersion": 12, + "sharedCrosshair": false, + "style": "dark", + "tags": [ + "confluent", + "kafka" + ], + "templating": { + "list": [ + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "Prometheus", + "hide": 0, + "includeAll": false, + "label": "Environment", + "multi": false, + "name": "env", + "options": [], + "query": "label_values(env)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "Prometheus", + "hide": 0, + "includeAll": true, + "label": "Topic", + "multi": true, + "name": "topic", + "options": [], + "query": "label_values(kafka_log_log_size{env=\"$env\"}, topic)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "hidden": false, + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Kafka topics - v2", + "uid": null, + "version": 0 +} diff --git a/grafana-dashboards/default/ksqldb-cluster.json b/grafana-dashboards/default/ksqldb-cluster.json new file mode 100644 index 00000000..aabb7b56 --- /dev/null +++ b/grafana-dashboards/default/ksqldb-cluster.json @@ -0,0 +1,3683 @@ +{ + "__inputs": [ + { + "description": "", + "label": "Prometheus", + "name": "DS_PROMETHEUS", + "pluginId": "prometheus", + "pluginName": "Prometheus", + "type": "datasource" + } + ], + "annotations": { + "list": [] + }, + "description": "Overview of ksqlDB clusters.", + "editable": true, + "gnetId": null, + "hideControls": false, + "id": null, + "links": [], + "panels": [ + { + "cacheTimeout": null, + "collapsed": false, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 1, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Overview", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 2, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "count(ksql_ksql_engine_query_stats_num_active_queries{env=\"$env\", ksqldb_cluster_id=\"$ksqldb_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ksqlDB: Online Servers", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 4, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 3, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(ksql_ksql_engine_query_stats_num_active_queries{env=\"$env\", ksqldb_cluster_id=\"$ksqldb_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ksqlDB: Sum of Active Queries", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "green", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 8, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 4, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(ksql_ksql_engine_query_stats_running_queries{env=\"$env\", ksqldb_cluster_id=\"$ksqldb_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ksqlDB: Sum of Running Queries", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 12, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 5, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(ksql_ksql_engine_query_stats_rebalancing_queries{env=\"$env\", ksqldb_cluster_id=\"$ksqldb_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ksqlDB: Sum of Rebalancing Queries", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "red", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 16, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 6, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "avg(ksql_ksql_engine_query_stats_error_queries{env=\"$env\", ksqldb_cluster_id=\"$ksqldb_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connect: Sum of Failed Queries", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 7, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "ksql_ksql_engine_query_stats_liveness_indicator{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Cluster Liveness", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 8, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "ksql_ksql_engine_query_stats_messages_consumed_per_sec{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Messages consumed/sec", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 9, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "ksql_ksql_engine_query_stats_messages_produced_per_sec{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Messages produced/sec", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "collapsed": false, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 10, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "System", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 11, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "irate(process_cpu_seconds_total{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\"}[5m])", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU usage", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 12, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum without(area)(jvm_memory_bytes_used{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory usage", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 13, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum without(gc)(irate(jvm_gc_collection_seconds_sum{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\"}[5m]))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "GC collection", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 14, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 15, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_streams_stream_thread_metrics_poll_latency_avg{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Poll Latency (Avg.)", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 16, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_streams_stream_thread_metrics_poll_latency_max{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Poll Latency (Max.)", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 17, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_streams_stream_thread_metrics_process_latency_avg{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Process Latency (Avg.)", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 18, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_streams_stream_thread_metrics_process_latency_max{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Process Latency (Max.)", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 5 + }, + "height": null, + "hideTimeOverride": false, + "id": 19, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_streams_stream_thread_metrics_commit_latency_avg{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Commit Latency (Avg.)", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 5 + }, + "height": null, + "hideTimeOverride": false, + "id": 20, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_streams_stream_thread_metrics_commit_latency_max{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Commit Latency (Max.)", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 6 + }, + "height": null, + "hideTimeOverride": false, + "id": 21, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_streams_stream_thread_metrics_punctuate_latency_avg{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Punctuate Latency (Avg.)", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 6 + }, + "height": null, + "hideTimeOverride": false, + "id": 22, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_streams_stream_thread_metrics_punctuate_latency_max{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Punctuate Latency (Max.)", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Queries Performance", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 7 + }, + "height": null, + "hideTimeOverride": false, + "id": 23, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ops" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 7 + }, + "height": null, + "hideTimeOverride": false, + "id": 24, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_put_rate{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Put Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 7 + }, + "height": null, + "hideTimeOverride": false, + "id": 25, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_put_latency_avg{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Put Latency (Avg.)", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 7 + }, + "height": null, + "hideTimeOverride": false, + "id": 26, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_put_latency_max{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Put Latency (Max.)", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ops" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 27, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_put_if_absent_rate{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Put if absent Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 28, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_put_if_absent_latency_avg{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Put if absent Latency (Avg.)", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 29, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_put_if_absent_latency_max{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Put if absent Latency (Max.)", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ops" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 9 + }, + "height": null, + "hideTimeOverride": false, + "id": 30, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_fetch_rate{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Fetch Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 9 + }, + "height": null, + "hideTimeOverride": false, + "id": 31, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_fetch_latency_avg{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Fetch Latency (Avg.)", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 9 + }, + "height": null, + "hideTimeOverride": false, + "id": 32, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_fetch_latency_max{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Fetch Latency (Max.)", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ops" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 10 + }, + "height": null, + "hideTimeOverride": false, + "id": 33, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_delete_rate{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Delete Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 10 + }, + "height": null, + "hideTimeOverride": false, + "id": 34, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_delete_latency_avg{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Delete Latency (Avg.)", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 10 + }, + "height": null, + "hideTimeOverride": false, + "id": 35, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_delete_latency_max{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Delete Latency (Max.)", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ops" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 11 + }, + "height": null, + "hideTimeOverride": false, + "id": 36, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_restore_rate{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Restore Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 11 + }, + "height": null, + "hideTimeOverride": false, + "id": 37, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_restore_latency_avg{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Restore Latency (Avg.)", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 11 + }, + "height": null, + "hideTimeOverride": false, + "id": 38, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_restore_latency_max{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Restore Latency (Max.)", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "State Stores", + "transformations": [], + "transparent": false, + "type": "row" + } + ], + "refresh": "30s", + "rows": [], + "schemaVersion": 12, + "sharedCrosshair": false, + "style": "dark", + "tags": [ + "confluent", + "ksqldb" + ], + "templating": { + "list": [ + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "Prometheus", + "hide": 0, + "includeAll": false, + "label": "Environment", + "multi": false, + "name": "env", + "options": [], + "query": "label_values(env)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "Prometheus", + "hide": 0, + "includeAll": false, + "label": "ksqlDB cluster", + "multi": false, + "name": "ksqldb_cluster", + "options": [], + "query": "label_values(ksql_ksql_engine_query_stats_num_active_queries{env=\"$env\"},ksqldb_cluster_id)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "Prometheus", + "hide": 2, + "includeAll": false, + "label": "ksqlDB cluster ID", + "multi": false, + "name": "ksqldb_cluster_id", + "options": [], + "query": "label_values(ksql_ksql_engine_query_stats_num_active_queries{env=\"$env\"},ksql_cluster)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "Prometheus", + "hide": 0, + "includeAll": true, + "label": "ksqlDB server", + "multi": true, + "name": "ksqldb_server", + "options": [], + "query": "label_values(ksql_ksql_engine_query_stats_num_active_queries{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\"}, hostname)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "hidden": false, + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "ksqlDB cluster - v2", + "uid": null, + "version": 0 +} diff --git a/grafana-dashboards/default/schema-registry-cluster.json b/grafana-dashboards/default/schema-registry-cluster.json new file mode 100644 index 00000000..b49fe69d --- /dev/null +++ b/grafana-dashboards/default/schema-registry-cluster.json @@ -0,0 +1,937 @@ +{ + "__inputs": [ + { + "description": "", + "label": "Prometheus", + "name": "DS_PROMETHEUS", + "pluginId": "prometheus", + "pluginName": "Prometheus", + "type": "datasource" + } + ], + "annotations": { + "list": [] + }, + "description": "Overview of the Schema Registry cluster", + "editable": true, + "gnetId": null, + "hideControls": false, + "id": null, + "links": [], + "panels": [ + { + "cacheTimeout": null, + "collapsed": false, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 1, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Overview", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + }, + { + "color": "green", + "index": 2, + "line": true, + "op": "gt", + "value": 2.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 2, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "count(kafka_schema_registry_registered_count{env=\"$env\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "SR: Online instances", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 4, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 3, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "avg(kafka_schema_registry_registered_count{env=\"$env\"})", + "format": "time_series", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "SR: Sum of Registered Schemas", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 8, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 4, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "avg(kafka_schema_registry_schemas_created{env=\"$env\"}) by (schema_type)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{schema_type}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "SR: Sum of Created Schemas by Type", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 12, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 5, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_schema_registry_schemas_deleted{env=\"$env\"}) by (schema_type)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{schema_type}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "SR: Sum of Deleted Schemas by Type", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 16, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 6, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_schema_registry_kafka_schema_registry_metrics_connection_count)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "SR: Sum of Active Connections", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "collapsed": false, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 7, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "System", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 8, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "irate(process_cpu_seconds_total{env=\"$env\",hostname=~\"$sr_server\"}[5m])", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU usage", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 9, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum without(area)(jvm_memory_bytes_used{env=\"$env\",hostname=~\"$sr_server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory usage", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 10, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum without(gc)(irate(jvm_gc_collection_seconds_sum{env=\"$env\",hostname=~\"$sr_server\"}[5m]))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "GC collection", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "refresh": "30s", + "rows": [], + "schemaVersion": 12, + "sharedCrosshair": false, + "style": "dark", + "tags": [ + "confluent", + "schema-registry" + ], + "templating": { + "list": [ + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "Prometheus", + "hide": 0, + "includeAll": false, + "label": "Environment", + "multi": false, + "name": "env", + "options": [], + "query": "label_values(env)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "Prometheus", + "hide": 0, + "includeAll": true, + "label": "Server", + "multi": true, + "name": "sr_server", + "options": [], + "query": "label_values(kafka_schema_registry_registered_count{env=\"$env\"}, hostname)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "hidden": false, + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Schema Registry cluster - v2", + "uid": null, + "version": 0 +} diff --git a/grafana-dashboards/default/zookeeper-cluster.json b/grafana-dashboards/default/zookeeper-cluster.json new file mode 100644 index 00000000..b1982b8d --- /dev/null +++ b/grafana-dashboards/default/zookeeper-cluster.json @@ -0,0 +1,1873 @@ +{ + "__inputs": [ + { + "description": "", + "label": "Prometheus", + "name": "DS_PROMETHEUS", + "pluginId": "prometheus", + "pluginName": "Prometheus", + "type": "datasource" + } + ], + "annotations": { + "list": [] + }, + "description": "Overview of the Zookeeper cluster", + "editable": true, + "gnetId": null, + "hideControls": false, + "id": null, + "links": [], + "panels": [ + { + "cacheTimeout": null, + "collapsed": false, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 1, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Overview", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 2.0, + "yaxis": "left" + }, + { + "color": "green", + "index": 2, + "line": true, + "op": "gt", + "value": 3.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 2, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "count(zookeeper_status_quorumsize{env=\"$env\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ZK: Quorum Size", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 4, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 3, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "avg(zookeeper_inmemorydatatree_nodecount{env=\"$env\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ZK: Avg. number of ZNodes", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 8, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 4, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(zookeeper_numaliveconnections{env=\"$env\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ZK: Sum of number of Alive Connections", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 12, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 5, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(zookeeper_inmemorydatatree_watchcount{env=\"$env\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ZK: Sum of watchers", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 8, + "x": 16, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 6, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "last" + ], + "displayMode": "table", + "placement": "right" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "zookeeper_outstandingrequests{env=\"$env\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}} ({{server_id}}:{{member_type}})", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ZK: Outstanding Requests", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "collapsed": false, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 7, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "System", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 8, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "irate(process_cpu_seconds_total{env=\"$env\",hostname=~\"$zk_server\"}[5m])", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU usage", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 9, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum without(area)(jvm_memory_bytes_used{env=\"$env\",hostname=\"$zk_server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Memory usage", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 10, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum without(gc)(irate(jvm_gc_collection_seconds_sum{env=\"$env\",hostname=\"$zk_server\"}[5m]))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "GC collection", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 11, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 12, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "zookeeper_minrequestlatency{env=\"$env\"} * zookeeper_ticktime", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ZK: Request Latency (Minimum)", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 13, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "zookeeper_avgrequestlatency{env=\"$env\"} * zookeeper_ticktime", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ZK: Request Latency (Average)", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 14, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "zookeeper_maxrequestlatency{env=\"$env\"} * zookeeper_ticktime", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ZK: Request Latency (Maximum)", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Server Latency", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 15, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 16, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_server_zookeeperclientmetrics_zookeeperrequestlatencyms{env=\"$env\",quantile=~\"$quantile\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Request Latency", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 17, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_server_sessionexpirelistener_zookeepersyncconnectspersec{env=\"$env\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Sync Connections/sec", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 18, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_server_sessionexpirelistener_zookeeperexpirespersec{env=\"$env\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Expired Connections/sec", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 19, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_server_sessionexpirelistener_zookeeperdisconnectspersec{env=\"$env\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Disconnected Connections/sec", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 20, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_server_sessionexpirelistener_zookeeperauthfailurespersec{env=\"$env\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Auth Failures on Connections/sec", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Client Latency (Kafka)", + "transformations": [], + "transparent": false, + "type": "row" + } + ], + "refresh": "30s", + "rows": [], + "schemaVersion": 12, + "sharedCrosshair": false, + "style": "dark", + "tags": [ + "confluent", + "kafka", + "zookeeper" + ], + "templating": { + "list": [ + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "Prometheus", + "hide": 0, + "includeAll": false, + "label": "Environment", + "multi": false, + "name": "env", + "options": [], + "query": "label_values(env)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "Prometheus", + "hide": 0, + "includeAll": true, + "label": "Server", + "multi": true, + "name": "zk_server", + "options": [], + "query": "label_values(zookeeper_outstandingrequests{env=\"$env\"}, hostname)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "Prometheus", + "hide": 0, + "includeAll": false, + "label": "Quantile", + "multi": false, + "name": "quantile", + "options": [], + "query": "label_values(kafka_server_zookeeperclientmetrics_zookeeperrequestlatencyms{env=\"$env\"}, quantile)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "hidden": false, + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Zookeeper cluster - v2", + "uid": null, + "version": 0 +} diff --git a/grafana-dashboards/kafka-cluster.py b/grafana-dashboards/kafka-cluster.py index 8c87fef5..e94c011f 100644 --- a/grafana-dashboards/kafka-cluster.py +++ b/grafana-dashboards/kafka-cluster.py @@ -1,941 +1,1228 @@ +import os import grafanalib.core as G -hcHeight = 5 -statWidth = 4 -tsWidth = 8 -templating = G.Templating( - list=[ - G.Template( - name="ns", - label="Namespace", - dataSource="Prometheus", - query="label_values(namespace)", - ), - G.Template( - name="broker", - label="Broker", - dataSource="Prometheus", - query='label_values(kafka_server_replicamanager_leadercount{namespace="$ns"}, pod)', - multi=True, - includeAll=True, - ), - G.Template( - name="quantile", - label="Quantile", - dataSource="Prometheus", - query='label_values(kafka_network_requestmetrics_requestqueuetimems{namespace="$ns"}, quantile)', - ), - ] -) +def dashboard(env_label="namespace", server_label="pod"): + default_height = 5 + stat_width = 4 + ts_width = 8 -healthcheck_panels = [ - G.RowPanel( - title="Health-check", - gridPos=G.GridPos(h=1, w=24, x=0, y=0), - ), - G.Stat( - title="Kafka: Online Brokers", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='count(kafka_server_replicamanager_leadercount{namespace="$ns"})', - ), - ], - reduceCalc="last", - thresholds=[ - G.Threshold(index=0, value=0.0, color="blue"), - ], - gridPos=G.GridPos(h=hcHeight, w=statWidth, x=statWidth * 0, y=0), - ), - G.Stat( - title="Kafka: Active Controller", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_controller_kafkacontroller_activecontrollercount{namespace="$ns"} > 0', - legendFormat="{{pod}}", - ), - ], - reduceCalc="last", - textMode="value_and_name", - thresholds=[ - G.Threshold(index=0, value=0.0, color="blue"), - ], - gridPos=G.GridPos(h=hcHeight, w=statWidth, x=statWidth * 1, y=0), - ), - G.Stat( - title="Kafka: Sum of Replica Imbalance", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='sum(kafka_controller_kafkacontroller_preferredreplicaimbalancecount{namespace="$ns"})', - ), - ], - reduceCalc="last", - thresholds=[ - G.Threshold(index=0, value=0.0, color="blue"), - ], - gridPos=G.GridPos(h=hcHeight, w=statWidth, x=statWidth * 2, y=0), - ), - G.Stat( - title="Kafka: Sum of Topics", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='sum(kafka_controller_kafkacontroller_globaltopiccount{namespace="$ns"})', - ), - ], - reduceCalc="last", - thresholds=[ - G.Threshold(index=0, value=0.0, color="blue"), - ], - gridPos=G.GridPos(h=hcHeight, w=statWidth, x=statWidth * 3, y=1), - ), - G.Stat( - title="Kafka: Rate of Requests/Sec", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='sum(rate(kafka_network_requestmetrics_requestspersec{namespace="$ns",pod=~"$broker"}[5m]))', - ), - ], - reduceCalc="last", - thresholds=[ - G.Threshold(index=0, value=0.0, color="blue"), - ], - format="reqps", - gridPos=G.GridPos(h=hcHeight, w=statWidth, x=statWidth * 4, y=0), - ), - G.Stat( - title="Kafka: Logs Size", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='sum(kafka_log_log_size{namespace="$ns",pod=~"$broker"}) by (pod)', - legendFormat="{{pod}}", - ), - ], - reduceCalc="last", - textMode="value_and_name", - thresholds=[ - G.Threshold(index=0, value=0.0, color="blue"), - ], - format="bytes", - gridPos=G.GridPos(h=hcHeight, w=statWidth, x=statWidth * 5, y=0), - ), - G.Stat( - title="Kafka: Sum of Partitions", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='sum(kafka_server_replicamanager_partitioncount{namespace="$ns",pod=~"$broker"})', + templating = G.Templating( + list=[ + G.Template( + name="env", + label="Environment", + dataSource="Prometheus", + query="label_values(" + env_label + ")", ), - ], - reduceCalc="last", - thresholds=[ - G.Threshold(index=0, value=0.0, color="blue"), - ], - gridPos=G.GridPos(h=hcHeight, w=statWidth, x=statWidth * 0, y=1), - ), - G.Stat( - title="Kafka: Sum of Partitions Under-Replicated (URP)", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='sum(kafka_server_replicamanager_underreplicatedpartitions{namespace="$ns",pod=~"$broker"})', - ), - ], - reduceCalc="last", - thresholds=[ - G.Threshold(index=0, value=0.0, color="green"), - G.Threshold(index=1, value=1.0, color="red"), - ], - gridPos=G.GridPos(h=hcHeight, w=statWidth, x=statWidth * 1, y=1), - ), - G.Stat( - title="Kafka: Sum of Partitions Under-MinISR", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='sum(kafka_cluster_partition_underminisr{namespace="$ns",pod=~"$broker"})', - ), - ], - reduceCalc="last", - thresholds=[ - G.Threshold(index=0, value=0.0, color="green"), - G.Threshold(index=1, value=1.0, color="red"), - ], - gridPos=G.GridPos(h=hcHeight, w=statWidth, x=statWidth * 2, y=1), - ), - G.Stat( - title="Kafka: Sum of Partitions Offline", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='sum(kafka_controller_kafkacontroller_offlinepartitionscount{namespace="$ns",pod=~"$broker"})', - ), - ], - reduceCalc="last", - thresholds=[ - G.Threshold(index=0, value=0.0, color="green"), - G.Threshold(index=1, value=1.0, color="red"), - ], - gridPos=G.GridPos(h=hcHeight, w=statWidth, x=statWidth * 3, y=1), - ), - G.Stat( - title="Kafka: Bytes In/Sec", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='sum(rate(kafka_server_brokertopicmetrics_bytesinpersec{namespace="$ns",pod=~"$broker"}[5m]))', + G.Template( + name="broker", + label="Broker", + dataSource="Prometheus", + query="label_values(kafka_server_replicamanager_leadercount{" + + env_label + + '="$env"}, ' + + server_label + + ")", + multi=True, + includeAll=True, ), - ], - reduceCalc="last", - thresholds=[ - G.Threshold(index=0, value=0.0, color="blue"), - ], - format="binBps", - gridPos=G.GridPos(h=hcHeight, w=statWidth, x=statWidth * 4, y=1), - ), - G.Stat( - title="Kafka: Bytes Out/Sec", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='sum(rate(kafka_server_brokertopicmetrics_bytesoutpersec{namespace="$ns",pod=~"$broker"}[5m]))', + G.Template( + name="quantile", + label="Quantile", + dataSource="Prometheus", + query="label_values(kafka_network_requestmetrics_requestqueuetimems{" + + env_label + + '="$env"}, quantile)', ), - ], - reduceCalc="last", - thresholds=[ - G.Threshold(index=0, value=0.0, color="blue"), - ], - format="binBps", - gridPos=G.GridPos(h=hcHeight, w=statWidth, x=statWidth * 5, y=1), - ), -] + ] + ) + + healthcheck_panels = [ + G.RowPanel( + title="Overview", + gridPos=G.GridPos(h=1, w=24, x=0, y=0), + ), + G.Stat( + title="Kafka: Online Brokers", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="count(kafka_server_replicamanager_leadercount{" + + env_label + + '="$env"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 0, y=0), + ), + G.Stat( + title="Kafka: Active Controller", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="kafka_controller_kafkacontroller_activecontrollercount{" + + env_label + + '="$env"} > 0', + legendFormat="{{" + server_label + "}}", + ), + ], + reduceCalc="last", + textMode="value_and_name", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 1, y=0), + ), + G.Stat( + title="Kafka: Sum of Replica Imbalance", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="sum(kafka_controller_kafkacontroller_preferredreplicaimbalancecount{" + + env_label + + '="$env"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 2, y=0), + ), + G.Stat( + title="Kafka: Sum of Topics", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="sum(kafka_controller_kafkacontroller_globaltopiccount{" + + env_label + + '="$env"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 3, y=1), + ), + G.Stat( + title="Kafka: Rate of Requests/Sec", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="sum(rate(kafka_network_requestmetrics_requestspersec{" + + env_label + + '="$env",' + + server_label + + '=~"$broker"}[5m]))', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + format="reqps", + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 4, y=0), + ), + G.Stat( + title="Kafka: Logs Size", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="sum(kafka_log_log_size{" + + env_label + + '="$env",' + + server_label + + '=~"$broker"}) by (' + + server_label + + ")", + legendFormat="{{" + server_label + "}}", + ), + ], + reduceCalc="last", + textMode="value_and_name", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + format="bytes", + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 5, y=0), + ), + G.Stat( + title="Kafka: Sum of Partitions", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="sum(kafka_server_replicamanager_partitioncount{" + + env_label + + '="$env",' + + server_label + + '=~"$broker"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 0, y=1), + ), + G.Stat( + title="Kafka: Sum of Partitions Under-Replicated (URP)", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="sum(kafka_server_replicamanager_underreplicatedpartitions{" + + env_label + + '="$env",' + + server_label + + '=~"$broker"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="green"), + G.Threshold(index=1, value=1.0, color="red"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 1, y=1), + ), + G.Stat( + title="Kafka: Sum of Partitions Under-MinISR", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="sum(kafka_cluster_partition_underminisr{" + + env_label + + '="$env",' + + server_label + + '=~"$broker"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="green"), + G.Threshold(index=1, value=1.0, color="red"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 2, y=1), + ), + G.Stat( + title="Kafka: Sum of Partitions Offline", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="sum(kafka_controller_kafkacontroller_offlinepartitionscount{" + + env_label + + '="$env",' + + server_label + + '=~"$broker"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="green"), + G.Threshold(index=1, value=1.0, color="red"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 3, y=1), + ), + G.Stat( + title="Kafka: Bytes In/Sec", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="sum(rate(kafka_server_brokertopicmetrics_bytesinpersec{" + + env_label + + '="$env",' + + server_label + + '=~"$broker"}[5m]))', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + format="binBps", + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 4, y=1), + ), + G.Stat( + title="Kafka: Bytes Out/Sec", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="sum(rate(kafka_server_brokertopicmetrics_bytesoutpersec{" + + env_label + + '="$env",' + + server_label + + '=~"$broker"}[5m]))', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + format="binBps", + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 5, y=1), + ), + ] -system_base = 2 + system_base = 2 -system_panels = [ - G.RowPanel( - title="System", - gridPos=G.GridPos(h=1, w=24, x=0, y=system_base), - ), - G.TimeSeries( - title="CPU usage", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='irate(process_cpu_seconds_total{namespace="$ns",type="kafka"}[5m])', - legendFormat="{{pod}}", + system_panels = [ + G.RowPanel( + title="System", + gridPos=G.GridPos(h=1, w=24, x=0, y=system_base), + ), + G.TimeSeries( + title="CPU usage", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="irate(process_cpu_seconds_total{" + + env_label + + '="$env",' + + server_label + + '=~"$broker"}[5m])', + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="percentunit", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=system_base ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="percentunit", - gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 0, y=system_base), - ), - G.TimeSeries( - title="Memory usage", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='sum without(area)(jvm_memory_bytes_used{namespace="$ns",type="kafka"})', - legendFormat="{{pod}}", + ), + G.TimeSeries( + title="Memory usage", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="sum without(area)(jvm_memory_bytes_used{" + + env_label + + '="$env",' + + server_label + + '=~"$broker"})', + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="bytes", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=system_base ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="bytes", - gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 1, y=system_base), - ), - G.TimeSeries( - title="GC collection", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='sum without(gc)(irate(jvm_gc_collection_seconds_sum{namespace="$ns",type="kafka"}[5m]))', - legendFormat="{{pod}}", + ), + G.TimeSeries( + title="GC collection", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="sum without(gc)(irate(jvm_gc_collection_seconds_sum{" + + env_label + + '="$env",' + + server_label + + '=~"$broker"}[5m]))', + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="percentunit", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=system_base ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="percentunit", - gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 2, y=system_base), - ), -] + ), + ] -throughtput_base = system_base + 1 -throughput_inner = [ - G.TimeSeries( - title="Messages In/Sec", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='sum without(topic) (rate(kafka_server_brokertopicmetrics_messagesinpersec{namespace="$ns",pod=~"$broker"}[5m]))', - legendFormat="{{pod}}", + throughtput_base = system_base + 1 + throughput_inner = [ + G.TimeSeries( + title="Messages In/Sec", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="sum without(topic) (rate(kafka_server_brokertopicmetrics_messagesinpersec{" + + env_label + + '="$env",' + + server_label + + '=~"$broker"}[5m]))', + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="cps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=throughtput_base ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="cps", - gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 0, y=throughtput_base), - ), - G.TimeSeries( - title="Bytes In/Sec", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='sum without(topic) (rate(kafka_server_brokertopicmetrics_bytesinpersec{namespace="$ns",pod=~"$broker"}[5m]))', - legendFormat="{{pod}}", + ), + G.TimeSeries( + title="Bytes In/Sec", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="sum without(topic) (rate(kafka_server_brokertopicmetrics_bytesinpersec{" + + env_label + + '="$env",' + + server_label + + '=~"$broker"}[5m]))', + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="binBps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=throughtput_base ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="binBps", - gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 1, y=throughtput_base), - ), - G.TimeSeries( - title="Bytes Out/Sec", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='sum without(topic) (rate(kafka_server_brokertopicmetrics_bytesoutpersec{namespace="$ns",pod=~"$broker"}[5m]))', - legendFormat="{{pod}}", + ), + G.TimeSeries( + title="Bytes Out/Sec", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="sum without(topic) (rate(kafka_server_brokertopicmetrics_bytesoutpersec{" + + env_label + + '="$env",' + + server_label + + '=~"$broker"}[5m]))', + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="binBps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=throughtput_base ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="binBps", - gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 2, y=throughtput_base), - ), -] -throughput_panels = [ - G.RowPanel( - title="Throughput", - description="Bytes in/out per second", - gridPos=G.GridPos(h=1, w=24, x=0, y=throughtput_base), - collapsed=True, - panels=throughput_inner, - ), -] + ), + ] + throughput_panels = [ + G.RowPanel( + title="Throughput", + description="Bytes in/out per second", + gridPos=G.GridPos(h=1, w=24, x=0, y=throughtput_base), + collapsed=True, + panels=throughput_inner, + ), + ] -thread_base = throughtput_base + 1 -thread_inner = [ - G.TimeSeries( - title="Network processor usage", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='1-kafka_network_socketserver_networkprocessoravgidlepercent{namespace="$ns",pod=~"$broker"}', - legendFormat="{{pod}}", + thread_base = throughtput_base + 1 + thread_inner = [ + G.TimeSeries( + title="Network processor usage", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="1-kafka_network_socketserver_networkprocessoravgidlepercent{" + + env_label + + '="$env",' + + server_label + + '=~"$broker"}', + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="percentunit", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=thread_base ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="percentunit", - gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 0, y=thread_base), - ), - G.TimeSeries( - title="Request processor (IO) usage", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='1-kafka_server_kafkarequesthandlerpool_requesthandleravgidlepercent_total{namespace="$ns",pod=~"$broker"}', - legendFormat="{{pod}}", + ), + G.TimeSeries( + title="Request processor (IO) usage", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="1-kafka_server_kafkarequesthandlerpool_requesthandleravgidlepercent_total{" + + env_label + + '="$env",' + + server_label + + '=~"$broker"}', + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="percentunit", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=thread_base ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="percentunit", - gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 1, y=thread_base), - ), -] -thread_panels = [ - G.RowPanel( - title="Thread Utilization", - description="Internal thread pools usage", - gridPos=G.GridPos(h=1, w=24, x=0, y=thread_base), - collapsed=True, - panels=thread_inner, - ), -] + ), + ] + thread_panels = [ + G.RowPanel( + title="Thread Utilization", + description="Internal thread pools usage", + gridPos=G.GridPos(h=1, w=24, x=0, y=thread_base), + collapsed=True, + panels=thread_inner, + ), + ] -request_base = thread_base + 1 -request_inner = [ - G.TimeSeries( - title="Requests rates", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='sum without(pod,instance,statefulset_kubernetes_io_pod_name)(rate(kafka_network_requestmetrics_requestspersec{namespace="$ns",pod=~"$broker"}[5m]))', - legendFormat="{{request}}(v{{version}})", + request_base = thread_base + 1 + request_inner = [ + G.TimeSeries( + title="Requests rates", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="sum without(pod,instance,statefulset_kubernetes_io_pod_name)(rate(kafka_network_requestmetrics_requestspersec{" + + env_label + + '="$env",' + + server_label + + '=~"$broker"}[5m]))', + legendFormat="{{request}}(v{{version}})", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="reqps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=request_base ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="reqps", - gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 0, y=request_base), - stacking={"mode": "normal", "group": "A"}, - ), - G.TimeSeries( - title="Error rates", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='sum without(pod,instance,statefulset_kubernetes_io_pod_name)(rate(kafka_network_requestmetrics_errorspersec{namespace="$ns",pod=~"$broker",error!="NONE"}[5m]))', - legendFormat="{{error}}@{{request}}", + stacking={"mode": "normal", "group": "A"}, + ), + G.TimeSeries( + title="Error rates", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="sum without(pod,instance,statefulset_kubernetes_io_pod_name)(rate(kafka_network_requestmetrics_errorspersec{" + + env_label + + '="$env",' + + server_label + + '=~"$broker",error!="NONE"}[5m]))', + legendFormat="{{error}}@{{request}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="reqps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=request_base ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="reqps", - gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 1, y=request_base), - stacking={"mode": "normal", "group": "A"}, - ), -] -request_panels = [ - G.RowPanel( - title="Request rates", - description="Sum of req/sec rates", - gridPos=G.GridPos(h=1, w=24, x=0, y=request_base), - collapsed=True, - panels=request_inner, - ), -] - + stacking={"mode": "normal", "group": "A"}, + ), + ] + request_panels = [ + G.RowPanel( + title="Request rates", + description="Sum of req/sec rates", + gridPos=G.GridPos(h=1, w=24, x=0, y=request_base), + collapsed=True, + panels=request_inner, + ), + ] -connection_base = request_base + 1 -connection_inner = [ - G.TimeSeries( - title="Sum of Connections alive per Broker", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='sum(kafka_server_socketservermetrics_connection_count{namespace="$ns",pod=~"$broker"}) by (pod)', - legendFormat="{{pod}}", + connection_base = request_base + 1 + connection_inner = [ + G.TimeSeries( + title="Sum of Connections alive per Broker", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="sum(kafka_server_socketservermetrics_connection_count{" + + env_label + + '="$env",' + + server_label + + '=~"$broker"}) by (' + + server_label + + ")", + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=connection_base ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 0, y=connection_base), - ), - G.TimeSeries( - title="Sum of Connections creation rate per Broker", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='sum(kafka_server_socketservermetrics_connection_creation_rate{namespace="$ns",pod=~"$broker"}) by (pod)', - legendFormat="{{pod}}", + ), + G.TimeSeries( + title="Sum of Connections creation rate per Broker", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="sum(kafka_server_socketservermetrics_connection_creation_rate{" + + env_label + + '="$env",' + + server_label + + '=~"$broker"}) by (' + + server_label + + ")", + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=connection_base ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 1, y=connection_base), - ), - G.TimeSeries( - title="Sum of Connections close rate per Broker", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='sum(kafka_server_socketservermetrics_connection_close_rate{namespace="$ns",pod=~"$broker"}) by (pod)', - legendFormat="{{pod}}", + ), + G.TimeSeries( + title="Sum of Connections close rate per Broker", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="sum(kafka_server_socketservermetrics_connection_close_rate{" + + env_label + + '="$env",' + + server_label + + '=~"$broker"}) by (' + + server_label + + ")", + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=connection_base ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 2, y=connection_base), - ), - # By Listener - G.TimeSeries( - title="Sum of Connections alive per Listener", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='sum(kafka_server_socketservermetrics_connection_count{namespace="$ns",pod=~"$broker"}) by (listener)', - legendFormat="{{listener}}", + ), + # By Listener + G.TimeSeries( + title="Sum of Connections alive per Listener", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="sum(kafka_server_socketservermetrics_connection_count{" + + env_label + + '="$env",' + + server_label + + '=~"$broker"}) by (listener)', + legendFormat="{{listener}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=connection_base + 1 ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - gridPos=G.GridPos( - h=hcHeight * 2, w=tsWidth, x=tsWidth * 0, y=connection_base + 1 - ), - ), - G.TimeSeries( - title="Sum of Connections creation rate per Listener", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='sum(kafka_server_socketservermetrics_connection_creation_rate{namespace="$ns",pod=~"$broker"}) by (listener)', - legendFormat="{{listener}}", + ), + G.TimeSeries( + title="Sum of Connections creation rate per Listener", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="sum(kafka_server_socketservermetrics_connection_creation_rate{" + + env_label + + '="$env",' + + server_label + + '=~"$broker"}) by (listener)', + legendFormat="{{listener}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=connection_base + 1 ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - gridPos=G.GridPos( - h=hcHeight * 2, w=tsWidth, x=tsWidth * 1, y=connection_base + 1 - ), - ), - G.TimeSeries( - title="Sum of Connections close rate per Listener", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='sum(kafka_server_socketservermetrics_connection_close_rate{namespace="$ns",pod=~"$broker"}) by (listener)', - legendFormat="{{listener}}", + ), + G.TimeSeries( + title="Sum of Connections close rate per Listener", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="sum(kafka_server_socketservermetrics_connection_close_rate{" + + env_label + + '="$env",' + + server_label + + '=~"$broker"}) by (listener)', + legendFormat="{{listener}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=connection_base + 1 ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - gridPos=G.GridPos( - h=hcHeight * 2, w=tsWidth, x=tsWidth * 2, y=connection_base + 1 - ), - ), -] -connection_panels = [ - G.RowPanel( - title="Connections", - gridPos=G.GridPos(h=1, w=24, x=0, y=connection_base), - collapsed=True, - panels=connection_inner, - ), -] + ), + ] + connection_panels = [ + G.RowPanel( + title="Connections", + gridPos=G.GridPos(h=1, w=24, x=0, y=connection_base), + collapsed=True, + panels=connection_inner, + ), + ] -isr_base = connection_base + 2 -isr_inner = [ - G.TimeSeries( - title="Rate of ISR Shrinks/sec", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='rate(kafka_server_replicamanager_isrshrinkspersec{namespace="$ns",pod=~"$broker"}[5m])', - legendFormat="{{pod}}", + isr_base = connection_base + 2 + isr_inner = [ + G.TimeSeries( + title="Rate of ISR Shrinks/sec", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="rate(kafka_server_replicamanager_isrshrinkspersec{" + + env_label + + '="$env",' + + server_label + + '=~"$broker"}[5m])', + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=isr_base ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 0, y=isr_base), - ), - G.TimeSeries( - title="Rate of ISR Expands/sec", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='rate(kafka_server_replicamanager_isrexpandspersec{namespace="$ns",pod=~"$broker"}[5m])', - legendFormat="{{pod}}", + ), + G.TimeSeries( + title="Rate of ISR Expands/sec", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="rate(kafka_server_replicamanager_isrexpandspersec{" + + env_label + + '="$env",' + + server_label + + '=~"$broker"}[5m])', + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=isr_base ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 1, y=isr_base), - ), -] -isr_panels = [ - G.RowPanel( - title="In-Sync Replicas", - gridPos=G.GridPos(h=1, w=24, x=0, y=isr_base), - collapsed=True, - panels=isr_inner, - ), -] + ), + ] + isr_panels = [ + G.RowPanel( + title="In-Sync Replicas", + gridPos=G.GridPos(h=1, w=24, x=0, y=isr_base), + collapsed=True, + panels=isr_inner, + ), + ] -producer_base = isr_base + 1 -producer_inner = [ - G.TimeSeries( - title="Produce: Request Queue Time", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_network_requestmetrics_requestqueuetimems{namespace="$ns",pod=~"$broker",quantile=~"$quantile",request="Produce"}', - legendFormat="{{pod}} ({{quantile}}th)", + producer_base = isr_base + 1 + producer_inner = [ + G.TimeSeries( + title="Produce: Request Queue Time", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="kafka_network_requestmetrics_requestqueuetimems{" + + env_label + + '="$env",' + + server_label + + '=~"$broker",quantile=~"$quantile",request="Produce"}', + legendFormat="{{" + server_label + "}} ({{quantile}}th)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=producer_base ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="ms", - gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 0, y=producer_base), - ), - G.TimeSeries( - title="Produce: Local Time", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_network_requestmetrics_localtimems{namespace="$ns",pod=~"$broker",quantile=~"$quantile",request="Produce"}', - legendFormat="{{pod}} ({{quantile}}th)", + ), + G.TimeSeries( + title="Produce: Local Time", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="kafka_network_requestmetrics_localtimems{" + + env_label + + '="$env",' + + server_label + + '=~"$broker",quantile=~"$quantile",request="Produce"}', + legendFormat="{{" + server_label + "}} ({{quantile}}th)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=producer_base ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="ms", - gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 1, y=producer_base), - ), - G.TimeSeries( - title="Produce: Remote Time", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_network_requestmetrics_remotetimems{namespace="$ns",pod=~"$broker",quantile=~"$quantile",request="Produce"}', - legendFormat="{{pod}} ({{quantile}}th)", + ), + G.TimeSeries( + title="Produce: Remote Time", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="kafka_network_requestmetrics_remotetimems{" + + env_label + + '="$env",' + + server_label + + '=~"$broker",quantile=~"$quantile",request="Produce"}', + legendFormat="{{" + server_label + "}} ({{quantile}}th)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=producer_base ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="ms", - gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 2, y=producer_base), - ), - G.TimeSeries( - title="Produce: Response Queue Time", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_network_requestmetrics_responsequeuetimems{namespace="$ns",pod=~"$broker",quantile=~"$quantile",request="Produce"}', - legendFormat="{{pod}} ({{quantile}}th)", + ), + G.TimeSeries( + title="Produce: Response Queue Time", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="kafka_network_requestmetrics_responsequeuetimems{" + + env_label + + '="$env",' + + server_label + + '=~"$broker",quantile=~"$quantile",request="Produce"}', + legendFormat="{{" + server_label + "}} ({{quantile}}th)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=producer_base + 1 ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="ms", - gridPos=G.GridPos( - h=hcHeight * 2, w=tsWidth, x=tsWidth * 0, y=producer_base + 1 - ), - ), - G.TimeSeries( - title="Produce: Response Send Time", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_network_requestmetrics_responsesendtimems{namespace="$ns",pod=~"$broker",quantile=~"$quantile",request="Produce"}', - legendFormat="{{pod}} ({{quantile}}th)", + ), + G.TimeSeries( + title="Produce: Response Send Time", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="kafka_network_requestmetrics_responsesendtimems{" + + env_label + + '="$env",' + + server_label + + '=~"$broker",quantile=~"$quantile",request="Produce"}', + legendFormat="{{" + server_label + "}} ({{quantile}}th)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=producer_base + 1 ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="ms", - gridPos=G.GridPos( - h=hcHeight * 2, w=tsWidth, x=tsWidth * 1, y=producer_base + 1 - ), - ), -] -producer_panels = [ - G.RowPanel( - title="Request latency: Producer", - gridPos=G.GridPos(h=1, w=24, x=0, y=producer_base), - collapsed=True, - panels=producer_inner, - ), -] + ), + ] + producer_panels = [ + G.RowPanel( + title="Request latency: Producer", + gridPos=G.GridPos(h=1, w=24, x=0, y=producer_base), + collapsed=True, + panels=producer_inner, + ), + ] -consumer_base = producer_base + 2 -consumer_inner = [ - G.TimeSeries( - title="Fetch: Request Queue Time", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_network_requestmetrics_requestqueuetimems{namespace="$ns",pod=~"$broker",quantile=~"$quantile",request="Fetch"}', - legendFormat="{{pod}} ({{quantile}}th)", + consumer_base = producer_base + 2 + consumer_inner = [ + G.TimeSeries( + title="Fetch: Request Queue Time", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="kafka_network_requestmetrics_requestqueuetimems{" + + env_label + + '="$env",' + + server_label + + '=~"$broker",quantile=~"$quantile",request="Fetch"}', + legendFormat="{{" + server_label + "}} ({{quantile}}th)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=consumer_base ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="ms", - gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 0, y=consumer_base), - ), - G.TimeSeries( - title="Fetch: Local Time", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_network_requestmetrics_localtimems{namespace="$ns",pod=~"$broker",quantile=~"$quantile",request="Fetch"}', - legendFormat="{{pod}} ({{quantile}}th)", + ), + G.TimeSeries( + title="Fetch: Local Time", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="kafka_network_requestmetrics_localtimems{" + + env_label + + '="$env",' + + server_label + + '=~"$broker",quantile=~"$quantile",request="Fetch"}', + legendFormat="{{" + server_label + "}} ({{quantile}}th)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=consumer_base ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="ms", - gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 1, y=consumer_base), - ), - G.TimeSeries( - title="Fetch: Remote Time", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_network_requestmetrics_remotetimems{namespace="$ns",pod=~"$broker",quantile=~"$quantile",request="Fetch"}', - legendFormat="{{pod}} ({{quantile}}th)", + ), + G.TimeSeries( + title="Fetch: Remote Time", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="kafka_network_requestmetrics_remotetimems{" + + env_label + + '="$env",' + + server_label + + '=~"$broker",quantile=~"$quantile",request="Fetch"}', + legendFormat="{{" + server_label + "}} ({{quantile}}th)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=consumer_base ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="ms", - gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 2, y=consumer_base), - ), - G.TimeSeries( - title="Fetch: Response Queue Time", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_network_requestmetrics_responsequeuetimems{namespace="$ns",pod=~"$broker",quantile=~"$quantile",request="Fetch"}', - legendFormat="{{pod}} ({{quantile}}th)", + ), + G.TimeSeries( + title="Fetch: Response Queue Time", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="kafka_network_requestmetrics_responsequeuetimems{" + + env_label + + '="$env",' + + server_label + + '=~"$broker",quantile=~"$quantile",request="Fetch"}', + legendFormat="{{" + server_label + "}} ({{quantile}}th)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=consumer_base + 1 ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="ms", - gridPos=G.GridPos( - h=hcHeight * 2, w=tsWidth, x=tsWidth * 0, y=consumer_base + 1 - ), - ), - G.TimeSeries( - title="Fetch: Response Send Time", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_network_requestmetrics_responsesendtimems{namespace="$ns",pod=~"$broker",quantile=~"$quantile",request="Fetch"}', - legendFormat="{{pod}} ({{quantile}}th)", + ), + G.TimeSeries( + title="Fetch: Response Send Time", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="kafka_network_requestmetrics_responsesendtimems{" + + env_label + + '="$env",' + + server_label + + '=~"$broker",quantile=~"$quantile",request="Fetch"}', + legendFormat="{{" + server_label + "}} ({{quantile}}th)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=consumer_base + 1 ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="ms", - gridPos=G.GridPos( - h=hcHeight * 2, w=tsWidth, x=tsWidth * 1, y=consumer_base + 1 - ), - ), -] -consumer_panels = [ - G.RowPanel( - title="Request latency: Consumer Fetch", - gridPos=G.GridPos(h=1, w=24, x=0, y=consumer_base), - collapsed=True, - panels=consumer_inner, - ), -] + ), + ] + consumer_panels = [ + G.RowPanel( + title="Request latency: Consumer Fetch", + gridPos=G.GridPos(h=1, w=24, x=0, y=consumer_base), + collapsed=True, + panels=consumer_inner, + ), + ] -replication_base = consumer_base + 2 -replication_inner = [ - G.TimeSeries( - title="Fetch: Request Queue Time", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_network_requestmetrics_requestqueuetimems{namespace="$ns",pod=~"$broker",quantile=~"$quantile",request="FetchFollower"}', - legendFormat="{{pod}} ({{quantile}}th)", + replication_base = consumer_base + 2 + replication_inner = [ + G.TimeSeries( + title="Fetch: Request Queue Time", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="kafka_network_requestmetrics_requestqueuetimems{" + + env_label + + '="$env",' + + server_label + + '=~"$broker",quantile=~"$quantile",request="FetchFollower"}', + legendFormat="{{" + server_label + "}} ({{quantile}}th)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=replication_base ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="ms", - gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 0, y=replication_base), - ), - G.TimeSeries( - title="Fetch: Local Time", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_network_requestmetrics_localtimems{namespace="$ns",pod=~"$broker",quantile=~"$quantile",request="FetchFollower"}', - legendFormat="{{pod}} ({{quantile}}th)", + ), + G.TimeSeries( + title="Fetch: Local Time", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="kafka_network_requestmetrics_localtimems{" + + env_label + + '="$env",' + + server_label + + '=~"$broker",quantile=~"$quantile",request="FetchFollower"}', + legendFormat="{{" + server_label + "}} ({{quantile}}th)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=replication_base ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="ms", - gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 1, y=replication_base), - ), - G.TimeSeries( - title="Fetch: Remote Time", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_network_requestmetrics_remotetimems{namespace="$ns",pod=~"$broker",quantile=~"$quantile",request="FetchFollower"}', - legendFormat="{{pod}} ({{quantile}}th)", + ), + G.TimeSeries( + title="Fetch: Remote Time", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="kafka_network_requestmetrics_remotetimems{" + + env_label + + '="$env",' + + server_label + + '=~"$broker",quantile=~"$quantile",request="FetchFollower"}', + legendFormat="{{" + server_label + "}} ({{quantile}}th)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=replication_base ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="ms", - gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 2, y=replication_base), - ), - G.TimeSeries( - title="Fetch: Response Queue Time", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_network_requestmetrics_responsequeuetimems{namespace="$ns",pod=~"$broker",quantile=~"$quantile",request="FetchFollower"}', - legendFormat="{{pod}} ({{quantile}}th)", + ), + G.TimeSeries( + title="Fetch: Response Queue Time", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="kafka_network_requestmetrics_responsequeuetimems{" + + env_label + + '="$env",' + + server_label + + '=~"$broker",quantile=~"$quantile",request="FetchFollower"}', + legendFormat="{{" + server_label + "}} ({{quantile}}th)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=replication_base + 1 ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="ms", - gridPos=G.GridPos( - h=hcHeight * 2, w=tsWidth, x=tsWidth * 0, y=replication_base + 1 - ), - ), - G.TimeSeries( - title="Fetch: Response Send Time", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_network_requestmetrics_responsesendtimems{namespace="$ns",pod=~"$broker",quantile=~"$quantile",request="FetchFollower"}', - legendFormat="{{pod}} ({{quantile}}th)", + ), + G.TimeSeries( + title="Fetch: Response Send Time", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="kafka_network_requestmetrics_responsesendtimems{" + + env_label + + '="$env",' + + server_label + + '=~"$broker",quantile=~"$quantile",request="FetchFollower"}', + legendFormat="{{" + server_label + "}} ({{quantile}}th)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=replication_base + 1 ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="ms", - gridPos=G.GridPos( - h=hcHeight * 2, w=tsWidth, x=tsWidth * 1, y=replication_base + 1 - ), - ), -] -replication_panels = [ - G.RowPanel( - title="Request latency: Replica Fetch", - gridPos=G.GridPos(h=1, w=24, x=0, y=replication_base), - collapsed=True, - panels=replication_inner, - ), -] + ), + ] + replication_panels = [ + G.RowPanel( + title="Request latency: Replica Fetch", + gridPos=G.GridPos(h=1, w=24, x=0, y=replication_base), + collapsed=True, + panels=replication_inner, + ), + ] -group_base = replication_base + 2 -group_inner = [ - G.TimeSeries( - title="Number of Groups per Broker", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_coordinator_group_groupmetadatamanager_numgroups{namespace="$ns",pod=~"$broker"}', - legendFormat="{{pod}}", + group_base = replication_base + 2 + group_inner = [ + G.TimeSeries( + title="Number of Groups per Broker", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="kafka_coordinator_group_groupmetadatamanager_numgroups{" + + env_label + + '="$env",' + + server_label + + '=~"$broker"}', + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=group_base ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 0, y=group_base), - ), - G.TimeSeries( - title="Number of Groups per Broker", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='sum(kafka_coordinator_group_groupmetadatamanager_numgroupsstable{namespace="$ns",pod=~"$broker"})', - legendFormat="stable", - ), - G.Target( - expr='sum(kafka_coordinator_group_groupmetadatamanager_numgroupspreparingrebalance{namespace="$ns",pod=~"$broker"})', - legendFormat="preparing_rebalance", - ), - G.Target( - expr='sum(kafka_coordinator_group_groupmetadatamanager_numgroupsdead{namespace="$ns",pod=~"$broker"})', - legendFormat="dead", - ), - G.Target( - expr='sum(kafka_coordinator_group_groupmetadatamanager_numgroupscompletingrebalance{namespace="$ns",pod=~"$broker"})', - legendFormat="completing_rebalance", - ), - G.Target( - expr='sum(kafka_coordinator_group_groupmetadatamanager_numgroupsempty{namespace="$ns",pod=~"$broker"})', - legendFormat="empty", + ), + G.TimeSeries( + title="Number of Groups per Broker", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="sum(kafka_coordinator_group_groupmetadatamanager_numgroupsstable{" + + env_label + + '="$env",' + + server_label + + '=~"$broker"})', + legendFormat="stable", + ), + G.Target( + expr="sum(kafka_coordinator_group_groupmetadatamanager_numgroupspreparingrebalance{" + + env_label + + '="$env",' + + server_label + + '=~"$broker"})', + legendFormat="preparing_rebalance", + ), + G.Target( + expr="sum(kafka_coordinator_group_groupmetadatamanager_numgroupsdead{" + + env_label + + '="$env",' + + server_label + + '=~"$broker"})', + legendFormat="dead", + ), + G.Target( + expr="sum(kafka_coordinator_group_groupmetadatamanager_numgroupscompletingrebalance{" + + env_label + + '="$env",' + + server_label + + '=~"$broker"})', + legendFormat="completing_rebalance", + ), + G.Target( + expr="sum(kafka_coordinator_group_groupmetadatamanager_numgroupsempty{" + + env_label + + '="$env",' + + server_label + + '=~"$broker"})', + legendFormat="empty", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + stacking={"mode": "normal"}, + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=group_base ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - stacking={"mode": "normal"}, - gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 1, y=group_base), - ), -] -group_panels = [ - G.RowPanel( - title="Group Coordinator", - gridPos=G.GridPos(h=1, w=24, x=0, y=group_base), - collapsed=True, - panels=group_inner, - ), -] + ), + ] + group_panels = [ + G.RowPanel( + title="Group Coordinator", + gridPos=G.GridPos(h=1, w=24, x=0, y=group_base), + collapsed=True, + panels=group_inner, + ), + ] -conversion_base = group_base + 1 -conversion_inner = [ - G.TimeSeries( - title="Sum of Produce conversion rate per sec", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='sum(kafka_server_brokertopicmetrics_producemessageconversionspersec{namespace="$ns",pod=~"$broker"})', - legendFormat="{{pod}}", + conversion_base = group_base + 1 + conversion_inner = [ + G.TimeSeries( + title="Sum of Produce conversion rate per sec", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="sum(kafka_server_brokertopicmetrics_producemessageconversionspersec{" + + env_label + + '="$env",' + + server_label + + '=~"$broker"})', + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="opsps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=conversion_base ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="opsps", - gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 0, y=conversion_base), - ), - G.TimeSeries( - title="Sum of Fetch conversion rate per sec", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='sum(kafka_server_brokertopicmetrics_fetchmessageconversionspersec{namespace="$ns",pod=~"$broker"})', - legendFormat="{{pod}}", + ), + G.TimeSeries( + title="Sum of Fetch conversion rate per sec", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="sum(kafka_server_brokertopicmetrics_fetchmessageconversionspersec{" + + env_label + + '="$env",' + + server_label + + '=~"$broker"})', + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="opsps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=conversion_base ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="opsps", - gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 1, y=conversion_base), - ), - G.TimeSeries( - title="Sum of Connections per version", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='sum(kafka_server_socketservermetrics_connections{namespace="$ns",pod=~"$broker"}) by (client_software_name,client_software_version)', - legendFormat="{{client_software_name}} (v{{client_software_version}})", + ), + G.TimeSeries( + title="Sum of Connections per version", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="sum(kafka_server_socketservermetrics_connections{" + + env_label + + '="$env",' + + server_label + + '=~"$broker"}) by (client_software_name,client_software_version)', + legendFormat="{{client_software_name}} (v{{client_software_version}})", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=conversion_base ), + ), + ] + conversion_panels = [ + G.RowPanel( + title="Message Conversion", + gridPos=G.GridPos(h=1, w=24, x=0, y=conversion_base), + collapsed=True, + panels=conversion_inner, + ), + ] + + panels = ( + healthcheck_panels + + system_panels + + throughput_panels + + thread_panels + + request_panels + + connection_panels + + isr_panels + + producer_panels + + consumer_panels + + replication_panels + + group_panels + + conversion_panels + ) + + return G.Dashboard( + title="Kafka cluster - v2", + description="Overview of the Kafka cluster", + tags=["confluent", "kafka"], + inputs=[ + G.DataSourceInput( + name="DS_PROMETHEUS", + label="Prometheus", + pluginId="prometheus", + pluginName="Prometheus", + ) ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 2, y=conversion_base), - ), -] -conversion_panels = [ - G.RowPanel( - title="Message Conversion", - gridPos=G.GridPos(h=1, w=24, x=0, y=conversion_base), - collapsed=True, - panels=conversion_inner, - ), -] + templating=templating, + timezone="browser", + panels=panels, + refresh="30s", + ).auto_panel_ids() -panels = ( - healthcheck_panels - + system_panels - + throughput_panels - + thread_panels - + request_panels - + connection_panels - + isr_panels - + producer_panels - + consumer_panels - + replication_panels - + group_panels - + conversion_panels -) -dashboard = G.Dashboard( - title="Kafka cluster - v2", - description="Overview of the Kafka cluster", - tags=["confluent", "kafka"], - inputs=[ - G.DataSourceInput( - name="DS_PROMETHEUS", - label="Prometheus", - pluginId="prometheus", - pluginName="Prometheus", - ) - ], - templating=templating, - timezone="browser", - panels=panels, - refresh="30s", -).auto_panel_ids() +env_label = os.environ.get("ENV_LABEL", "env") +server_label = os.environ.get("SERVER_LABEL", "hostname") +dashboard = dashboard(env_label, server_label) diff --git a/grafana-dashboards/kafka-connect-cluster.py b/grafana-dashboards/kafka-connect-cluster.py index 9887c4af..0ff01bd2 100644 --- a/grafana-dashboards/kafka-connect-cluster.py +++ b/grafana-dashboards/kafka-connect-cluster.py @@ -1,823 +1,1127 @@ +import os import grafanalib.core as G -defaultHeight = 5 -statWidth = 4 -tsWidth = 8 -templating = G.Templating( - list=[ - G.Template( - name="ns", - label="Namespace", - dataSource="Prometheus", - query="label_values(namespace)", - ), - G.Template( - name="connect_app", - label="Connect cluster", - dataSource="Prometheus", - query='label_values(kafka_connect_connect_worker_metrics_connector_total_task_count{namespace="$ns"}, app)', - ), - G.Template( - name="connect_worker", - label="Connect worker", - dataSource="Prometheus", - query='label_values(kafka_connect_connect_worker_metrics_connector_total_task_count{namespace="$ns",app="$connect_app"}, pod)', - multi=True, - includeAll=True, - ), - G.Template( - name="connector", - label="Connector", - dataSource="Prometheus", - query='label_values(kafka_connect_connect_worker_metrics_connector_total_task_count{namespace="$ns",app="$connect_app"}, connector)', - multi=True, - includeAll=True, - ), - ] -) +def dashboard( + env_label="namespace", + server_label="' + server_label + '", + connect_cluster_label="app", +): + default_height = 5 + stat_width = 4 + ts_width = 8 -hc_base = 0 -hc_panels = [ - G.RowPanel( - title="Health-check", - gridPos=G.GridPos(h=1, w=24, x=0, y=hc_base), - ), - G.Stat( - title="Connect: Online Workers", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='count(kafka_connect_app_info{namespace="$ns",app="$connect_app",version!=""})', - legendFormat="{{version}}", + templating = G.Templating( + list=[ + G.Template( + name="env", + label="Environment", + dataSource="Prometheus", + query="label_values(" + env_label + ")", ), - ], - reduceCalc="last", - textMode="value_and_name", - thresholds=[ - G.Threshold(index=0, value=0.0, color="blue"), - ], - gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 0, y=hc_base), - ), - G.Stat( - title="Connect: Sum of Total Tasks", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='sum(kafka_connect_connect_worker_metrics_connector_total_task_count{namespace="$ns",app="$connect_app"})', + G.Template( + name="connect_cluster", + label="Connect cluster", + dataSource="Prometheus", + query="label_values(kafka_connect_connect_worker_metrics_connector_total_task_count{" + + env_label + + '="$env"}, ' + + connect_cluster_label + + ")", ), - ], - reduceCalc="last", - thresholds=[ - G.Threshold(index=0, value=0.0, color="blue"), - ], - gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 1, y=hc_base), - ), - G.Stat( - title="Connect: Sum of Running Tasks", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='sum(kafka_connect_connect_worker_metrics_connector_running_task_count{namespace="$ns",app="$connect_app"})', + G.Template( + name="connect_worker", + label="Connect worker", + dataSource="Prometheus", + query="label_values(kafka_connect_connect_worker_metrics_connector_total_task_count{" + + env_label + + '="$env",' + + connect_cluster_label + + '="$connect_cluster"}, ' + + server_label + + ")", + multi=True, + includeAll=True, ), - ], - reduceCalc="last", - thresholds=[ - G.Threshold(index=0, value=0.0, color="blue"), - G.Threshold(index=1, value=1.0, color="green"), - ], - gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 2, y=hc_base), - ), - G.Stat( - title="Connect: Sum of Paused Tasks", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='sum(kafka_connect_connect_worker_metrics_connector_paused_task_count{namespace="$ns",app="$connect_app"})', + G.Template( + name="connector", + label="Connector", + dataSource="Prometheus", + query="label_values(kafka_connect_connect_worker_metrics_connector_total_task_count{" + + env_label + + '="$env",' + + connect_cluster_label + + '="$connect_cluster"}, connector)', + multi=True, + includeAll=True, ), - ], - reduceCalc="last", - thresholds=[ - G.Threshold(index=0, value=0.0, color="blue"), - G.Threshold(index=1, value=1.0, color="yellow"), - ], - gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 3, y=hc_base), - ), - G.Stat( - title="Connect: Sum of Failed Tasks", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='sum(kafka_connect_connect_worker_metrics_connector_failed_task_count{namespace="$ns",app="$connect_app"})', + ] + ) + + hc_base = 0 + hc_panels = [ + G.RowPanel( + title="Overview", + gridPos=G.GridPos(h=1, w=24, x=0, y=hc_base), + ), + G.Stat( + title="Connect: Online Workers", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="count(kafka_connect_app_info{" + + env_label + + '="$env",' + + connect_cluster_label + + '="$connect_cluster",version!=""})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos( + h=default_height, w=stat_width, x=stat_width * 0, y=hc_base ), - ], - reduceCalc="last", - thresholds=[ - G.Threshold(index=0, value=0.0, color="blue"), - G.Threshold(index=1, value=1.0, color="red"), - ], - gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 4, y=hc_base), - ), - G.Stat( - title="Connect: Time since last rebalance", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_connect_connect_worker_rebalance_metrics_time_since_last_rebalance_ms{namespace="$ns",app="$connect_app"} >= 0', - legendFormat="{{pod}}", + ), + G.Stat( + title="Connect: Sum of Total Tasks", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="sum(kafka_connect_connect_worker_metrics_connector_total_task_count{" + + env_label + + '="$env",' + + connect_cluster_label + + '="$connect_cluster"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos( + h=default_height, w=stat_width, x=stat_width * 1, y=hc_base ), - ], - reduceCalc="last", - format="clockms", - graphMode="none", - thresholds=[ - G.Threshold(index=0, value=0.0, color="blue"), - ], - gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 5, y=hc_base), - ), - G.Table( - title="Connect Workers", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_connect_app_info{namespace="$ns",app="$connect_app",start_time_ms!=""}', - format="table", - instant=True, - ), - G.Target( - expr='kafka_connect_app_info{namespace="$ns",app="$connect_app",version!=""}', - format="table", - instant=True, - ), - G.Target( - expr='sum by (pod) (kafka_connect_connect_worker_metrics_connector_count{namespace="$ns",app="$connect_app"})', - format="table", - instant=True, - ), - G.Target( - expr='sum by (pod) (kafka_connect_connect_worker_metrics_connector_startup_success_total{namespace="$ns",app="$connect_app"})', - format="table", - instant=True, - ), - G.Target( - expr='sum by (pod) (kafka_connect_connect_worker_metrics_connector_startup_failure_total{namespace="$ns",app="$connect_app"})', - format="table", - instant=True, - ), - G.Target( - expr='sum by (pod) (kafka_connect_connect_worker_metrics_task_count{namespace="$ns",app="$connect_app"})', - format="table", - instant=True, - ), - G.Target( - expr='sum by (pod) (kafka_connect_connect_worker_metrics_task_startup_success_total{namespace="$ns",app="$connect_app"})', - format="table", - instant=True, - ), - G.Target( - expr='sum by (pod) (kafka_connect_connect_worker_metrics_task_startup_failure_total{namespace="$ns",app="$connect_app"})', - format="table", - instant=True, + ), + G.Stat( + title="Connect: Sum of Running Tasks", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="sum(kafka_connect_connect_worker_metrics_connector_running_task_count{" + + env_label + + '="$env",' + + connect_cluster_label + + '="$connect_cluster"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + G.Threshold(index=1, value=1.0, color="green"), + ], + gridPos=G.GridPos( + h=default_height, w=stat_width, x=stat_width * 2, y=hc_base ), - ], - transformations=[ - {"id": "seriesToColumns", "options": {"byField": "pod"}}, - { - "id": "filterFieldsByName", - "options": { - "include": { - "names": [ - "pod", - "app 1", - "start_time_ms", - "version", - "Value #C", - "Value #D", - "Value #E", - "Value #F", - "Value #G", - "Value #H", - "namespace 1", - ] - } - }, - }, - { - "id": "organize", - "options": { - "excludeByName": {}, - "indexByName": { - "app 1": 1, - "namespace 1": 0, - "pod": 2, - "start_time_ms": 3, - "version": 4, - }, - "renameByName": { - "Value #C": "connectors", - "Value #D": "conn. success", - "Value #E": "conn. failure", - "Value #F": "tasks", - "Value #G": "tasks success", - "Value #H": "tasks failure", - "app 1": "cluster", - "namespace 1": "namespace", - "pod": "worker", - "start_time_ms": "start time", - "version": "version", + ), + G.Stat( + title="Connect: Sum of Paused Tasks", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="sum(kafka_connect_connect_worker_metrics_connector_paused_task_count{" + + env_label + + '="$env",' + + connect_cluster_label + + '="$connect_cluster"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + G.Threshold(index=1, value=1.0, color="yellow"), + ], + gridPos=G.GridPos( + h=default_height, w=stat_width, x=stat_width * 3, y=hc_base + ), + ), + G.Stat( + title="Connect: Sum of Failed Tasks", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="sum(kafka_connect_connect_worker_metrics_connector_failed_task_count{" + + env_label + + '="$env",' + + connect_cluster_label + + '="$connect_cluster"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + G.Threshold(index=1, value=1.0, color="red"), + ], + gridPos=G.GridPos( + h=default_height, w=stat_width, x=stat_width * 4, y=hc_base + ), + ), + G.Stat( + title="Connect: Time since last rebalance", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="kafka_connect_connect_worker_rebalance_metrics_time_since_last_rebalance_ms{" + + env_label + + '="$env",' + + connect_cluster_label + + '="$connect_cluster"} >= 0', + legendFormat="{{" + server_label + "}}", + ), + ], + reduceCalc="last", + format="clockms", + graphMode="none", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos( + h=default_height, w=stat_width, x=stat_width * 5, y=hc_base + ), + ), + G.Table( + title="Connect Workers", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="kafka_connect_app_info{" + + env_label + + '="$env",' + + connect_cluster_label + + '="$connect_cluster",start_time_ms!=""}', + format="table", + instant=True, + ), + G.Target( + expr="kafka_connect_app_info{" + + env_label + + '="$env",' + + connect_cluster_label + + '="$connect_cluster",version!=""}', + format="table", + instant=True, + ), + G.Target( + expr="sum by (" + + server_label + + ") (kafka_connect_connect_worker_metrics_connector_count{" + + env_label + + '="$env",' + + connect_cluster_label + + '="$connect_cluster"})', + format="table", + instant=True, + ), + G.Target( + expr="sum by (" + + server_label + + ") (kafka_connect_connect_worker_metrics_connector_startup_success_total{" + + env_label + + '="$env",' + + connect_cluster_label + + '="$connect_cluster"})', + format="table", + instant=True, + ), + G.Target( + expr="sum by (" + + server_label + + ") (kafka_connect_connect_worker_metrics_connector_startup_failure_total{" + + env_label + + '="$env",' + + connect_cluster_label + + '="$connect_cluster"})', + format="table", + instant=True, + ), + G.Target( + expr="sum by (" + + server_label + + ") (kafka_connect_connect_worker_metrics_task_count{" + + env_label + + '="$env",' + + connect_cluster_label + + '="$connect_cluster"})', + format="table", + instant=True, + ), + G.Target( + expr="sum by (" + + server_label + + ") (kafka_connect_connect_worker_metrics_task_startup_success_total{" + + env_label + + '="$env",' + + connect_cluster_label + + '="$connect_cluster"})', + format="table", + instant=True, + ), + G.Target( + expr="sum by (" + + server_label + + ") (kafka_connect_connect_worker_metrics_task_startup_failure_total{" + + env_label + + '="$env",' + + connect_cluster_label + + '="$connect_cluster"})', + format="table", + instant=True, + ), + ], + transformations=[ + {"id": "seriesToColumns", "options": {"byField": server_label}}, + { + "id": "filterFieldsByName", + "options": { + "include": { + "names": [ + server_label, + connect_cluster_label + " 1", + "start_time_ms", + "version", + "Value #C", + "Value #D", + "Value #E", + "Value #F", + "Value #G", + "Value #H", + env_label + " 1", + ] + } }, }, - }, - { - "id": "convertFieldType", - "options": { - "conversions": [ - {"destinationType": "number", "targetField": "start_time_ms"} - ], - "fields": {}, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + connect_cluster_label + " 1": 1, + env_label + " 1": 0, + server_label: 2, + "start_time_ms": 3, + "version": 4, + }, + "renameByName": { + "Value #C": "connectors", + "Value #D": "conn. success", + "Value #E": "conn. failure", + "Value #F": "tasks", + "Value #G": "tasks success", + "Value #H": "tasks failure", + connect_cluster_label + " 1": "cluster", + env_label + " 1": "environment", + server_label: "worker", + "start_time_ms": "start time", + "version": "version", + }, + }, }, - }, - ], - gridPos=G.GridPos(h=defaultHeight, w=24, x=0, y=hc_base + 1), - ), - G.Table( - title="Connectors", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_connect_connector_info{namespace="$ns",app="$connect_app"}', - format="table", - instant=True, - ), - G.Target( - expr='sum by (connector) (kafka_connect_connect_worker_metrics_connector_total_task_count{namespace="$ns",app="$connect_app"})', - format="table", - instant=True, - ), - G.Target( - expr='sum by (connector) (kafka_connect_connect_worker_metrics_connector_running_task_count{namespace="$ns",app="$connect_app"})', - format="table", - instant=True, - ), - G.Target( - expr='sum by (connector) (kafka_connect_connect_worker_metrics_connector_failed_task_count{namespace="$ns",app="$connect_app"})', - format="table", - instant=True, - ), - G.Target( - expr='sum by (connector) (kafka_connect_connect_worker_metrics_connector_paused_task_count{namespace="$ns",app="$connect_app"})', - format="table", - instant=True, - ), - ], - transformations=[ - {"id": "seriesToColumns", "options": {"byField": "connector"}}, - { - "id": "filterFieldsByName", - "options": { - "include": { - "names": [ - "connector", - "Value #B", - "Value #C", - "Value #D", - "Value #E", - ] - } + ], + gridPos=G.GridPos(h=default_height, w=24, x=0, y=hc_base + 1), + ), + G.Table( + title="Connectors", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="kafka_connect_connector_info{" + + env_label + + '="$env",' + + connect_cluster_label + + '="$connect_cluster"}', + format="table", + instant=True, + ), + G.Target( + expr="sum by (connector) (kafka_connect_connect_worker_metrics_connector_total_task_count{" + + env_label + + '="$env",' + + connect_cluster_label + + '="$connect_cluster"})', + format="table", + instant=True, + ), + G.Target( + expr="sum by (connector) (kafka_connect_connect_worker_metrics_connector_running_task_count{" + + env_label + + '="$env",' + + connect_cluster_label + + '="$connect_cluster"})', + format="table", + instant=True, + ), + G.Target( + expr="sum by (connector) (kafka_connect_connect_worker_metrics_connector_failed_task_count{" + + env_label + + '="$env",' + + connect_cluster_label + + '="$connect_cluster"})', + format="table", + instant=True, + ), + G.Target( + expr="sum by (connector) (kafka_connect_connect_worker_metrics_connector_paused_task_count{" + + env_label + + '="$env",' + + connect_cluster_label + + '="$connect_cluster"})', + format="table", + instant=True, + ), + ], + transformations=[ + {"id": "seriesToColumns", "options": {"byField": "connector"}}, + { + "id": "filterFieldsByName", + "options": { + "include": { + "names": [ + "connector", + "Value #B", + "Value #C", + "Value #D", + "Value #E", + ] + } + }, }, - }, - { - "id": "organize", - "options": { - "excludeByName": {}, - "renameByName": { - "Value #B": "tasks", - "Value #C": "running", - "Value #D": "failed", - "Value #E": "paused", + { + "id": "organize", + "options": { + "excludeByName": {}, + "renameByName": { + "Value #B": "tasks", + "Value #C": "running", + "Value #D": "failed", + "Value #E": "paused", + }, }, }, - }, - ], - gridPos=G.GridPos(h=defaultHeight, w=24, x=0, y=hc_base + 2), - ), - - G.TimeSeries( - title="Tasks Running Ratio", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_connect_connector_task_metrics_running_ratio{namespace="$ns",app="$connect_app"}', - legendFormat="{{connector}}", - ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="percentunit", - gridPos=G.GridPos(h=defaultHeight * 2, w=12, x=0, y=hc_base + 3), - ), - G.TimeSeries( - title="Rebalance Latency", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_connect_connect_worker_rebalance_metrics_rebalance_avg_time_ms{namespace="$ns",app="$connect_app"}', - legendFormat="{{pod}}", - ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="ms", - gridPos=G.GridPos(h=defaultHeight * 2, w=12, x=12, y=hc_base + 3), - ), -] + ], + gridPos=G.GridPos(h=default_height, w=24, x=0, y=hc_base + 2), + ), + G.TimeSeries( + title="Tasks Running Ratio", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="kafka_connect_connector_task_metrics_running_ratio{" + + env_label + + '="$env",' + + connect_cluster_label + + '="$connect_cluster"}', + legendFormat="{{connector}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="percentunit", + gridPos=G.GridPos(h=default_height * 2, w=12, x=0, y=hc_base + 3), + ), + G.TimeSeries( + title="Rebalance Latency", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="kafka_connect_connect_worker_rebalance_metrics_rebalance_avg_time_ms{" + + env_label + + '="$env",' + + connect_cluster_label + + '="$connect_cluster"}', + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos(h=default_height * 2, w=12, x=12, y=hc_base + 3), + ), + ] -system_base = hc_base + 4 -system_panels = [ - G.RowPanel( - title="System", - gridPos=G.GridPos(h=1, w=24, x=0, y=system_base), - ), - G.TimeSeries( - title="CPU usage", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='irate(process_cpu_seconds_total{namespace="$ns",app="$connect_app",type="connect"}[5m])', - legendFormat="{{pod}}", + system_base = hc_base + 4 + system_panels = [ + G.RowPanel( + title="System", + gridPos=G.GridPos(h=1, w=24, x=0, y=system_base), + ), + G.TimeSeries( + title="CPU usage", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="irate(process_cpu_seconds_total{" + + env_label + + '="$env",' + + connect_cluster_label + + '="$connect_cluster",' + + server_label + + '=~"$connect_worker"}[5m])', + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="percentunit", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=system_base ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="percentunit", - gridPos=G.GridPos(h=defaultHeight * 2, w=tsWidth, x=tsWidth * 0, y=system_base), - ), - G.TimeSeries( - title="Memory usage", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='sum without(area)(jvm_memory_bytes_used{namespace="$ns",app="$connect_app",type="connect"})', - legendFormat="{{pod}}", + ), + G.TimeSeries( + title="Memory usage", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="sum without(area)(jvm_memory_bytes_used{" + + env_label + + '="$env",' + + connect_cluster_label + + '="$connect_cluster",' + + server_label + + '=~"$connect_worker"})', + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="bytes", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=system_base ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="bytes", - gridPos=G.GridPos(h=defaultHeight * 2, w=tsWidth, x=tsWidth * 1, y=system_base), - ), - G.TimeSeries( - title="GC collection", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='sum without(gc)(irate(jvm_gc_collection_seconds_sum{namespace="$ns",app="$connect_app",type="connect"}[5m]))', - legendFormat="{{pod}}", + ), + G.TimeSeries( + title="GC collection", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="sum without(gc)(irate(jvm_gc_collection_seconds_sum{" + + env_label + + '="$env",' + + connect_cluster_label + + '="$connect_cluster",' + + server_label + + '=~"$connect_worker"}[5m]))', + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="percentunit", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=system_base ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="percentunit", - gridPos=G.GridPos(h=defaultHeight * 2, w=tsWidth, x=tsWidth * 2, y=system_base), - ), -] + ), + ] -worker_base = system_base + 1 -worker_inner = [ - G.TimeSeries( - title="Incoming Byte Rate", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_connect_connect_metrics_incoming_byte_rate{namespace="$ns",app="$connect_app",pod=~"$connect_worker"}', - legendFormat="{{pod}}", + worker_base = system_base + 1 + worker_inner = [ + G.TimeSeries( + title="Incoming Byte Rate", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="kafka_connect_connect_metrics_incoming_byte_rate{" + + env_label + + '="$env",' + + connect_cluster_label + + '="$connect_cluster",' + + server_label + + '=~"$connect_worker"}', + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="binBps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=worker_base ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="binBps", - gridPos=G.GridPos(h=defaultHeight * 2, w=tsWidth, x=tsWidth * 0, y=worker_base), - ), - G.TimeSeries( - title="Outgoing Byte Rate", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_connect_connect_metrics_outgoing_byte_rate{namespace="$ns",app="$connect_app",pod=~"$connect_worker"}', - legendFormat="{{pod}}", + ), + G.TimeSeries( + title="Outgoing Byte Rate", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="kafka_connect_connect_metrics_outgoing_byte_rate{" + + env_label + + '="$env",' + + connect_cluster_label + + '="$connect_cluster",' + + server_label + + '=~"$connect_worker"}', + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="binBps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=worker_base ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="binBps", - gridPos=G.GridPos(h=defaultHeight * 2, w=tsWidth, x=tsWidth * 1, y=worker_base), - ), - G.TimeSeries( - title="IO Ratio", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_connect_connect_metrics_io_ratio{namespace="$ns",app="$connect_app",pod=~"$connect_worker"}', - legendFormat="{{pod}}", + ), + G.TimeSeries( + title="IO Ratio", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="kafka_connect_connect_metrics_io_ratio{" + + env_label + + '="$env",' + + connect_cluster_label + + '="$connect_cluster",' + + server_label + + '=~"$connect_worker"}', + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="percentunit", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=worker_base + 1 ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="percentunit", - gridPos=G.GridPos( - h=defaultHeight * 2, w=tsWidth, x=tsWidth * 0, y=worker_base + 1 - ), - ), - G.TimeSeries( - title="Network IO Rate", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_connect_connect_metrics_network_io_rate{namespace="$ns",app="$connect_app",pod=~"$connect_worker"}', - legendFormat="{{pod}}", + ), + G.TimeSeries( + title="Network IO Rate", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="kafka_connect_connect_metrics_network_io_rate{" + + env_label + + '="$env",' + + connect_cluster_label + + '="$connect_cluster",' + + server_label + + '=~"$connect_worker"}', + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="binBps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=worker_base + 1 ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="binBps", - gridPos=G.GridPos( - h=defaultHeight * 2, w=tsWidth, x=tsWidth * 1, y=worker_base + 1 - ), - ), - G.TimeSeries( - title="Active Connections", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_connect_connect_metrics_connection_count{namespace="$ns",app="$connect_app",pod=~"$connect_worker"}', - legendFormat="{{pod}}", + ), + G.TimeSeries( + title="Active Connections", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="kafka_connect_connect_metrics_connection_count{" + + env_label + + '="$env",' + + connect_cluster_label + + '="$connect_cluster",' + + server_label + + '=~"$connect_worker"}', + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=worker_base + 2 ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - gridPos=G.GridPos( - h=defaultHeight * 2, w=tsWidth, x=tsWidth * 0, y=worker_base + 2 - ), - ), - G.TimeSeries( - title="Authentications", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_connect_connect_metrics_successful_authentication_rate{namespace="$ns",app="$connect_app",pod=~"$connect_worker"}', - legendFormat="{{pod}} (success)", - ), - G.Target( - expr='kafka_connect_connect_metrics_failed_authentication_total{namespace="$ns",app="$connect_app",pod=~"$connect_worker"}', - legendFormat="{{pod}} (failed)", + ), + G.TimeSeries( + title="Authentications", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="kafka_connect_connect_metrics_successful_authentication_rate{" + + env_label + + '="$env",' + + connect_cluster_label + + '="$connect_cluster",' + + server_label + + '=~"$connect_worker"}', + legendFormat="{{" + server_label + "}} (success)", + ), + G.Target( + expr="kafka_connect_connect_metrics_failed_authentication_total{" + + env_label + + '="$env",' + + connect_cluster_label + + '="$connect_cluster",' + + server_label + + '=~"$connect_worker"}', + legendFormat="{{" + server_label + "}} (failed)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=worker_base + 2 ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - gridPos=G.GridPos( - h=defaultHeight * 2, w=tsWidth, x=tsWidth * 1, y=worker_base + 2 - ), - ), -] -worker_panels = [ - G.RowPanel( - title="Connect Workers", - gridPos=G.GridPos(h=1, w=24, x=0, y=worker_base), - collapsed=True, - panels=worker_inner, - ), -] + ), + ] + worker_panels = [ + G.RowPanel( + title="Connect Workers", + gridPos=G.GridPos(h=1, w=24, x=0, y=worker_base), + collapsed=True, + panels=worker_inner, + ), + ] -tasks_base = worker_base + 1 -tasks_inner = [ - G.TimeSeries( - title="Batch Size (Avg.)", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_connect_connector_task_metrics_batch_size_avg{namespace="$ns",app="$connect_app",pod=~"$connect_worker",connector=~"$connector"}', - legendFormat="{{connector}}[{{task}}]", + tasks_base = worker_base + 1 + tasks_inner = [ + G.TimeSeries( + title="Batch Size (Avg.)", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="kafka_connect_connector_task_metrics_batch_size_avg{" + + env_label + + '="$env",' + + connect_cluster_label + + '="$connect_cluster",' + + server_label + + '=~"$connect_worker",connector=~"$connector"}', + legendFormat="{{connector}}[{{task}}]", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="bytes", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=tasks_base ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="bytes", - gridPos=G.GridPos( - h=defaultHeight * 2, w=tsWidth, x=tsWidth * 0, y=tasks_base - ), - ), - G.TimeSeries( - title="Batch Size (Max.)", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_connect_connector_task_metrics_batch_size_max{namespace="$ns",app="$connect_app",pod=~"$connect_worker",connector=~"$connector"}', - legendFormat="{{connector}}[{{task}}]", + ), + G.TimeSeries( + title="Batch Size (Max.)", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="kafka_connect_connector_task_metrics_batch_size_max{" + + env_label + + '="$env",' + + connect_cluster_label + + '="$connect_cluster",' + + server_label + + '=~"$connect_worker",connector=~"$connector"}', + legendFormat="{{connector}}[{{task}}]", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="bytes", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=tasks_base ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="bytes", - gridPos=G.GridPos( - h=defaultHeight * 2, w=tsWidth, x=tsWidth * 1, y=tasks_base - ), - ), - - G.TimeSeries( - title="Offset commit success %", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_connect_connector_task_metrics_offset_commit_success_percentage{namespace="$ns",app="$connect_app",pod=~"$connect_worker",connector=~"$connector"}', - legendFormat="{{connector}}[{{task}}]", + ), + G.TimeSeries( + title="Offset commit success %", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="kafka_connect_connector_task_metrics_offset_commit_success_percentage{" + + env_label + + '="$env",' + + connect_cluster_label + + '="$connect_cluster",' + + server_label + + '=~"$connect_worker",connector=~"$connector"}', + legendFormat="{{connector}}[{{task}}]", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="percentunit", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=tasks_base + 1 ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="percentunit", - gridPos=G.GridPos( - h=defaultHeight * 2, w=tsWidth, x=tsWidth * 0, y=tasks_base + 1 - ), - ), - G.TimeSeries( - title="Offset commit avg. latency", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_connect_connector_task_metrics_offset_commit_avg_time_ms{namespace="$ns",app="$connect_app",pod=~"$connect_worker",connector=~"$connector"}', - legendFormat="{{connector}}[{{task}}]", + ), + G.TimeSeries( + title="Offset commit avg. latency", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="kafka_connect_connector_task_metrics_offset_commit_avg_time_ms{" + + env_label + + '="$env",' + + connect_cluster_label + + '="$connect_cluster",' + + server_label + + '=~"$connect_worker",connector=~"$connector"}', + legendFormat="{{connector}}[{{task}}]", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=tasks_base + 1 ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="ms", - gridPos=G.GridPos( - h=defaultHeight * 2, w=tsWidth, x=tsWidth * 1, y=tasks_base + 1 - ), - ), -] -tasks_panels = [ - G.RowPanel( - title="Tasks", - gridPos=G.GridPos(h=1, w=24, x=0, y=tasks_base), - collapsed=True, - panels=tasks_inner, - ), -] + ), + ] + tasks_panels = [ + G.RowPanel( + title="Tasks", + gridPos=G.GridPos(h=1, w=24, x=0, y=tasks_base), + collapsed=True, + panels=tasks_inner, + ), + ] -task_errors_base = tasks_base + 2 -task_errors_inner = [ - G.TimeSeries( - title="Total Record Failures", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_connect_task_error_metrics_total_record_failures{namespace="$ns",app="$connect_app",pod=~"$connect_worker",connector=~"$connector"}', - legendFormat="{{connector}}[{{task}}]", + task_errors_base = tasks_base + 2 + task_errors_inner = [ + G.TimeSeries( + title="Total Record Failures", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="kafka_connect_task_error_metrics_total_record_failures{" + + env_label + + '="$env",' + + connect_cluster_label + + '="$connect_cluster",' + + server_label + + '=~"$connect_worker",connector=~"$connector"}', + legendFormat="{{connector}}[{{task}}]", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=task_errors_base ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - gridPos=G.GridPos( - h=defaultHeight * 2, w=tsWidth, x=tsWidth * 0, y=task_errors_base - ), - ), - G.TimeSeries( - title="Total Record Error", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_connect_task_error_metrics_total_record_errors{namespace="$ns",app="$connect_app",pod=~"$connect_worker",connector=~"$connector"}', - legendFormat="{{connector}}[{{task}}]", + ), + G.TimeSeries( + title="Total Record Error", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="kafka_connect_task_error_metrics_total_record_errors{" + + env_label + + '="$env",' + + connect_cluster_label + + '="$connect_cluster",' + + server_label + + '=~"$connect_worker",connector=~"$connector"}', + legendFormat="{{connector}}[{{task}}]", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=task_errors_base ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - gridPos=G.GridPos( - h=defaultHeight * 2, w=tsWidth, x=tsWidth * 1, y=task_errors_base - ), - ), - G.TimeSeries( - title="Total Records Skipped", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_connect_task_error_metrics_total_records_skipped{namespace="$ns",app="$connect_app",pod=~"$connect_worker",connector=~"$connector"}', - legendFormat="{{connector}}[{{task}}]", + ), + G.TimeSeries( + title="Total Records Skipped", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="kafka_connect_task_error_metrics_total_records_skipped{" + + env_label + + '="$env",' + + connect_cluster_label + + '="$connect_cluster",' + + server_label + + '=~"$connect_worker",connector=~"$connector"}', + legendFormat="{{connector}}[{{task}}]", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=task_errors_base ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - gridPos=G.GridPos( - h=defaultHeight * 2, w=tsWidth, x=tsWidth * 2, y=task_errors_base - ), - ), - G.TimeSeries( - title="Total Errors Logged", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_connect_task_error_metrics_total_errors_logged{namespace="$ns",app="$connect_app",pod=~"$connect_worker",connector=~"$connector"}', - legendFormat="{{connector}}[{{task}}]", + ), + G.TimeSeries( + title="Total Errors Logged", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="kafka_connect_task_error_metrics_total_errors_logged{" + + env_label + + '="$env",' + + connect_cluster_label + + '="$connect_cluster",' + + server_label + + '=~"$connect_worker",connector=~"$connector"}', + legendFormat="{{connector}}[{{task}}]", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=task_errors_base + 1 ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - gridPos=G.GridPos( - h=defaultHeight * 2, w=tsWidth, x=tsWidth * 0, y=task_errors_base + 1 - ), - ), - G.TimeSeries( - title="Total Retries", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_connect_task_error_metrics_total_retries{namespace="$ns",app="$connect_app",pod=~"$connect_worker",connector=~"$connector"}', - legendFormat="{{connector}}[{{task}}]", + ), + G.TimeSeries( + title="Total Retries", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="kafka_connect_task_error_metrics_total_retries{" + + env_label + + '="$env",' + + connect_cluster_label + + '="$connect_cluster",' + + server_label + + '=~"$connect_worker",connector=~"$connector"}', + legendFormat="{{connector}}[{{task}}]", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=task_errors_base + 1 ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - gridPos=G.GridPos( - h=defaultHeight * 2, w=tsWidth, x=tsWidth * 1, y=task_errors_base + 1 - ), - ), - G.TimeSeries( - title="Dead Letter Topic Requests", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_connect_task_error_metrics_deadletterqueue_produce_requests{namespace="$ns",app="$connect_app",pod=~"$connect_worker",connector=~"$connector"}', - legendFormat="{{connector}}[{{task}}]", + ), + G.TimeSeries( + title="Dead Letter Topic Requests", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="kafka_connect_task_error_metrics_deadletterqueue_produce_requests{" + + env_label + + '="$env",' + + connect_cluster_label + + '="$connect_cluster",' + + server_label + + '=~"$connect_worker",connector=~"$connector"}', + legendFormat="{{connector}}[{{task}}]", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=task_errors_base + 1 ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - gridPos=G.GridPos( - h=defaultHeight * 2, w=tsWidth, x=tsWidth * 2, y=task_errors_base + 1 - ), - ), -] -task_errors_panels = [ - G.RowPanel( - title="Task Errors", - gridPos=G.GridPos(h=1, w=24, x=0, y=task_errors_base), - collapsed=True, - panels=task_errors_inner, - ), -] + ), + ] + task_errors_panels = [ + G.RowPanel( + title="Task Errors", + gridPos=G.GridPos(h=1, w=24, x=0, y=task_errors_base), + collapsed=True, + panels=task_errors_inner, + ), + ] -source_base = task_errors_base + 2 -source_inner = [ - G.TimeSeries( - title="Poll Batch Avg. Latency", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_connect_source_task_metrics_poll_batch_avg_time_ms{namespace="$ns",app="$connect_app",pod=~"$connect_worker",connector=~"$connector"}', - legendFormat="{{connector}}[{{task}}]", + source_base = task_errors_base + 2 + source_inner = [ + G.TimeSeries( + title="Poll Batch Avg. Latency", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="kafka_connect_source_task_metrics_poll_batch_avg_time_ms{" + + env_label + + '="$env",' + + connect_cluster_label + + '="$connect_cluster",' + + server_label + + '=~"$connect_worker",connector=~"$connector"}', + legendFormat="{{connector}}[{{task}}]", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=source_base ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="ms", - gridPos=G.GridPos( - h=defaultHeight * 2, w=tsWidth, x=tsWidth * 0, y=source_base - ), - ), - G.TimeSeries( - title="Poll Batch Max. Latency", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_connect_source_task_metrics_poll_batch_max_time_ms{namespace="$ns",app="$connect_app",pod=~"$connect_worker",connector=~"$connector"}', - legendFormat="{{connector}}[{{task}}]", + ), + G.TimeSeries( + title="Poll Batch Max. Latency", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="kafka_connect_source_task_metrics_poll_batch_max_time_ms{" + + env_label + + '="$env",' + + connect_cluster_label + + '="$connect_cluster",' + + server_label + + '=~"$connect_worker",connector=~"$connector"}', + legendFormat="{{connector}}[{{task}}]", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=source_base ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="ms", - gridPos=G.GridPos( - h=defaultHeight * 2, w=tsWidth, x=tsWidth * 1, y=source_base - ), - ), - G.TimeSeries( - title="Source Record Poll Rate", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_connect_source_task_metrics_source_record_poll_rate{namespace="$ns",app="$connect_app",pod=~"$connect_worker",connector=~"$connector"}', - legendFormat="{{connector}}[{{task}}]", + ), + G.TimeSeries( + title="Source Record Poll Rate", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="kafka_connect_source_task_metrics_source_record_poll_rate{" + + env_label + + '="$env",' + + connect_cluster_label + + '="$connect_cluster",' + + server_label + + '=~"$connect_worker",connector=~"$connector"}', + legendFormat="{{connector}}[{{task}}]", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ops", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=source_base + 1 ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="ops", - gridPos=G.GridPos( - h=defaultHeight * 2, w=tsWidth, x=tsWidth * 0, y=source_base + 1 - ), - ), - G.TimeSeries( - title="Source Record Write Rate", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_connect_source_task_metrics_source_record_write_rate{namespace="$ns",app="$connect_app",pod=~"$connect_worker",connector=~"$connector"}', - legendFormat="{{connector}}[{{task}}]", + ), + G.TimeSeries( + title="Source Record Write Rate", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="kafka_connect_source_task_metrics_source_record_write_rate{" + + env_label + + '="$env",' + + connect_cluster_label + + '="$connect_cluster",' + + server_label + + '=~"$connect_worker",connector=~"$connector"}', + legendFormat="{{connector}}[{{task}}]", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ops", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=source_base + 1 ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="ops", - gridPos=G.GridPos( - h=defaultHeight * 2, w=tsWidth, x=tsWidth * 1, y=source_base + 1 - ), - ), -] -source_panels = [ - G.RowPanel( - title="Source Tasks", - gridPos=G.GridPos(h=1, w=24, x=0, y=source_base), - collapsed=True, - panels=source_inner, - ), -] + ), + ] + source_panels = [ + G.RowPanel( + title="Source Tasks", + gridPos=G.GridPos(h=1, w=24, x=0, y=source_base), + collapsed=True, + panels=source_inner, + ), + ] -sink_base = source_base + 2 -sink_inner = [ - G.TimeSeries( - title="Put Batch Avg. Latency", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_connect_sink_task_metrics_put_batch_avg_time_ms{namespace="$ns",app="$connect_app",pod=~"$connect_worker",connector=~"$connector"}', - legendFormat="{{connector}}[{{task}}]", + sink_base = source_base + 2 + sink_inner = [ + G.TimeSeries( + title="Put Batch Avg. Latency", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="kafka_connect_sink_task_metrics_put_batch_avg_time_ms{" + + env_label + + '="$env",' + + connect_cluster_label + + '="$connect_cluster",' + + server_label + + '=~"$connect_worker",connector=~"$connector"}', + legendFormat="{{connector}}[{{task}}]", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=sink_base ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="ms", - gridPos=G.GridPos( - h=defaultHeight * 2, w=tsWidth, x=tsWidth * 0, y=sink_base - ), - ), - G.TimeSeries( - title="Put Batch Max. Latency", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_connect_sink_task_metrics_put_batch_max_time_ms{namespace="$ns",app="$connect_app",pod=~"$connect_worker",connector=~"$connector"}', - legendFormat="{{connector}}[{{task}}]", + ), + G.TimeSeries( + title="Put Batch Max. Latency", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="kafka_connect_sink_task_metrics_put_batch_max_time_ms{" + + env_label + + '="$env",' + + connect_cluster_label + + '="$connect_cluster",' + + server_label + + '=~"$connect_worker",connector=~"$connector"}', + legendFormat="{{connector}}[{{task}}]", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=sink_base ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="ms", - gridPos=G.GridPos( - h=defaultHeight * 2, w=tsWidth, x=tsWidth * 1, y=sink_base - ), - ), - G.TimeSeries( - title="Partition Count", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_connect_sink_task_metrics_partition_count{namespace="$ns",app="$connect_app",pod=~"$connect_worker",connector=~"$connector"}', - legendFormat="{{connector}}[{{task}}]", + ), + G.TimeSeries( + title="Partition Count", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="kafka_connect_sink_task_metrics_partition_count{" + + env_label + + '="$env",' + + connect_cluster_label + + '="$connect_cluster",' + + server_label + + '=~"$connect_worker",connector=~"$connector"}', + legendFormat="{{connector}}[{{task}}]", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=sink_base + 1 ), + ), + ] + sink_panels = [ + G.RowPanel( + title="Sink Tasks", + gridPos=G.GridPos(h=1, w=24, x=0, y=sink_base), + collapsed=True, + panels=sink_inner, + ), + ] + + panels = ( + hc_panels + + system_panels + + tasks_panels + + task_errors_panels + + source_panels + + sink_panels + + worker_panels + ) + + return G.Dashboard( + title="Kafka Connect cluster - v2", + description="Overview of the Kafka Connect cluster", + tags=["confluent", "kafka-connect"], + inputs=[ + G.DataSourceInput( + name="DS_PROMETHEUS", + label="Prometheus", + pluginId="prometheus", + pluginName="Prometheus", + ) ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - gridPos=G.GridPos( - h=defaultHeight * 2, w=tsWidth, x=tsWidth * 0, y=sink_base + 1 - ), - ), -] -sink_panels = [ - G.RowPanel( - title="Sink Tasks", - gridPos=G.GridPos(h=1, w=24, x=0, y=sink_base), - collapsed=True, - panels=sink_inner, - ), -] + templating=templating, + timezone="browser", + panels=panels, + refresh="30s", + ).auto_panel_ids() -panels = hc_panels + system_panels + tasks_panels + task_errors_panels + source_panels + sink_panels + worker_panels -dashboard = G.Dashboard( - title="Kafka Connect cluster - v2", - description="Overview of the Kafka Connect cluster", - tags=["confluent", "kafka-connect"], - inputs=[ - G.DataSourceInput( - name="DS_PROMETHEUS", - label="Prometheus", - pluginId="prometheus", - pluginName="Prometheus", - ) - ], - templating=templating, - timezone="browser", - panels=panels, - refresh="30s", -).auto_panel_ids() +env_label = os.environ.get("ENV_LABEL", "env") +server_label = os.environ.get("SERVER_LABEL", "hostname") +connect_cluster_label = os.environ.get( + "CONNECT_CLUSTER_LABEL", "kafka_connect_cluster_id" +) +dashboard = dashboard(env_label, server_label, connect_cluster_label) diff --git a/grafana-dashboards/kafka-topics.py b/grafana-dashboards/kafka-topics.py index 0a643ada..b4481fe5 100644 --- a/grafana-dashboards/kafka-topics.py +++ b/grafana-dashboards/kafka-topics.py @@ -1,253 +1,283 @@ +import os import grafanalib.core as G -defaultHeight = 10 -tsWidth = 12 -tableWidth = 12 -templating = G.Templating( - list=[ - G.Template( - name="ns", - label="Namespace", - dataSource="Prometheus", - query="label_values(namespace)", - ), - G.Template( - name="topic", - label="Topic", - dataSource="Prometheus", - query='label_values(kafka_log_log_size{namespace="$ns"}, topic)', - multi=True, - includeAll=True, - ), - ] -) +def dashboard(env_label="namespace"): + default_height = 10 + ts_width = 12 + table_width = 12 -topk = "10" + templating = G.Templating( + list=[ + G.Template( + name="env", + label="Environment", + dataSource="Prometheus", + query="label_values(" + env_label + ")", + ), + G.Template( + name="topic", + label="Topic", + dataSource="Prometheus", + query="label_values(kafka_log_log_size{" + + env_label + + '="$env"}, topic)', + multi=True, + includeAll=True, + ), + ] + ) + + topk = "10" -throughput_base = 0 -throughput_layers = 3 -throughput_panels = [ - G.RowPanel( - title="Throughput", - gridPos=G.GridPos(h=1, w=24, x=0, y=throughput_base), - ), - G.TimeSeries( - title="Messages In/Sec", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr="topk(" - + topk - + ', sum without(instance,pod,statefulset_kubernetes_io_pod_name) (rate(kafka_server_brokertopicmetrics_messagesinpersec{topic=~"$topic",namespace="$ns"}[5m])))', - legendFormat="{{topic}}", + throughput_base = 0 + throughput_layers = 3 + throughput_panels = [ + G.RowPanel( + title="Throughput", + gridPos=G.GridPos(h=1, w=24, x=0, y=throughput_base), + ), + G.TimeSeries( + title="Messages In/Sec", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ', sum without(instance,pod,statefulset_kubernetes_io_pod_name) (rate(kafka_server_brokertopicmetrics_messagesinpersec{topic=~"$topic",' + + env_label + + '="$env"}[5m])))', + legendFormat="{{topic}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="cps", + stacking={"mode": "normal"}, + gridPos=G.GridPos( + h=default_height, w=ts_width, x=ts_width * 0, y=throughput_base ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="cps", - stacking={"mode": "normal"}, - gridPos=G.GridPos(h=defaultHeight, w=tsWidth, x=tsWidth * 0, y=throughput_base), - ), - G.TimeSeries( - title="Log size", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr="topk(" - + topk - + ', sum(kafka_log_log_size{namespace="$ns",topic=~"$topic"}) by (topic))', - legendFormat="{{topic}}", + ), + G.TimeSeries( + title="Log size", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ", sum(kafka_log_log_size{" + + env_label + + '="$env",topic=~"$topic"}) by (topic))', + legendFormat="{{topic}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="bytes", + stacking={"mode": "normal"}, + gridPos=G.GridPos( + h=default_height, w=ts_width, x=ts_width * 1, y=throughput_base ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="bytes", - stacking={"mode": "normal"}, - gridPos=G.GridPos(h=defaultHeight, w=tsWidth, x=tsWidth * 1, y=throughput_base), - ), - G.TimeSeries( - title="Bytes In/Sec", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr="topk(" - + topk - + ', sum without(instance,pod,statefulset_kubernetes_io_pod_name) (rate(kafka_server_brokertopicmetrics_bytesinpersec{topic=~"$topic",namespace="$ns"}[5m])))', - legendFormat="{{topic}}", + ), + G.TimeSeries( + title="Bytes In/Sec", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ', sum without(instance,pod,statefulset_kubernetes_io_pod_name) (rate(kafka_server_brokertopicmetrics_bytesinpersec{topic=~"$topic",' + + env_label + + '="$env"}[5m])))', + legendFormat="{{topic}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="binBps", + stacking={"mode": "normal"}, + gridPos=G.GridPos( + h=default_height, w=ts_width, x=ts_width * 0, y=throughput_base + 1 ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="binBps", - stacking={"mode": "normal"}, - gridPos=G.GridPos( - h=defaultHeight, w=tsWidth, x=tsWidth * 0, y=throughput_base + 1 ), - ), - G.TimeSeries( - title="Bytes Out/Sec", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr="topk(" - + topk - + ', sum without(instance,pod,statefulset_kubernetes_io_pod_name) (rate(kafka_server_brokertopicmetrics_bytesoutpersec{topic=~"$topic",namespace="$ns"}[5m])))', - legendFormat="{{topic}}", + G.TimeSeries( + title="Bytes Out/Sec", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ', sum without(instance,pod,statefulset_kubernetes_io_pod_name) (rate(kafka_server_brokertopicmetrics_bytesoutpersec{topic=~"$topic",' + + env_label + + '="$env"}[5m])))', + legendFormat="{{topic}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="binBps", + stacking={"mode": "normal"}, + gridPos=G.GridPos( + h=default_height, w=ts_width, x=ts_width * 1, y=throughput_base + 1 ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="binBps", - stacking={"mode": "normal"}, - gridPos=G.GridPos( - h=defaultHeight, w=tsWidth, x=tsWidth * 1, y=throughput_base + 1 ), - ), - G.TimeSeries( - title="Produce Requests/Sec", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr="topk(" - + topk - + ', sum(rate(kafka_server_brokertopicmetrics_totalproducerequestspersec{ namespace="$ns", topic=~"$topic"}[5m])) by (topic))', - legendFormat="{{topic}}", + G.TimeSeries( + title="Produce Requests/Sec", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ", sum(rate(kafka_server_brokertopicmetrics_totalproducerequestspersec{ " + + env_label + + '="$env", topic=~"$topic"}[5m])) by (topic))', + legendFormat="{{topic}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="reqps", + stacking={"mode": "normal"}, + gridPos=G.GridPos( + h=default_height, w=ts_width, x=ts_width * 0, y=throughput_base + 2 ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="reqps", - stacking={"mode": "normal"}, - gridPos=G.GridPos( - h=defaultHeight, w=tsWidth, x=tsWidth * 0, y=throughput_base + 2 ), - ), - G.TimeSeries( - title="Consumer Fetch Requests/Sec", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr="topk(" - + topk - + ', sum(rate(kafka_server_brokertopicmetrics_totalfetchrequestspersec{ namespace="$ns", topic=~"$topic"}[5m])) by (topic))', - legendFormat="{{topic}}", + G.TimeSeries( + title="Consumer Fetch Requests/Sec", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ", sum(rate(kafka_server_brokertopicmetrics_totalfetchrequestspersec{ " + + env_label + + '="$env", topic=~"$topic"}[5m])) by (topic))', + legendFormat="{{topic}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="reqps", + stacking={"mode": "normal"}, + gridPos=G.GridPos( + h=default_height, w=ts_width, x=ts_width * 1, y=throughput_base + 2 ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="reqps", - stacking={"mode": "normal"}, - gridPos=G.GridPos( - h=defaultHeight, w=tsWidth, x=tsWidth * 1, y=throughput_base + 2 ), - ), -] - + ] -offsets_txs = [ - { - "id": "organize", - "options": { - "excludeByName": { - "Time": True, - "__name__": True, - "app": True, - "confluent_platform": True, - "controller_revision_hash": True, - "job": True, - "clusterId": True, - "confluentPlatform": True, - "instance": True, - "namespace": True, - "platform_confluent_io_type": True, - "statefulset_kubernetes_io_pod_name": True, - "type": True, + offsets_txs = [ + { + "id": "organize", + "options": { + "excludeByName": { + "Time": True, + "__name__": True, + "app": True, + "confluent_platform": True, + "controller_revision_hash": True, + "job": True, + "clusterId": True, + "confluentPlatform": True, + "instance": True, + "namespace": True, + "platform_confluent_io_type": True, + "statefulset_kubernetes_io_pod_name": True, + "type": True, + }, + "indexByName": { + "pod": 1, + "topic": 2, + "partition": 3, + "Value": 4, + }, + "renameByName": {"Value": "offset"}, }, - "indexByName": { - "pod": 1, - "topic": 2, - "partition": 3, - "Value": 4, + }, + { + "id": "convertFieldType", + "options": { + "conversions": [ + {"destinationType": "number", "targetField": "partition"} + ], + "fields": {}, }, - "renameByName": {"Value": "offset"}, }, - }, - { - "id": "convertFieldType", - "options": { - "conversions": [{"destinationType": "number", "targetField": "partition"}], - "fields": {}, + {"id": "sortBy", "options": {"fields": {}, "sort": [{"field": "topic"}]}}, + { + "id": "sortBy", + "options": {"fields": {}, "sort": [{"field": "partition"}]}, }, - }, - {"id": "sortBy", "options": {"fields": {}, "sort": [{"field": "topic"}]}}, - { - "id": "sortBy", - "options": {"fields": {}, "sort": [{"field": "partition"}]}, - }, -] + ] -offsets_base = throughput_base + throughput_layers -offsets_inner = [ - G.Table( - title="Start Offsets", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_log_log_logstartoffset{namespace="$ns",topic=~"$topic"}', - legendFormat="{{topic}}", - format="table", - instant=True, + offsets_base = throughput_base + throughput_layers + offsets_inner = [ + G.Table( + title="Start Offsets", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="kafka_log_log_logstartoffset{" + + env_label + + '="$env",topic=~"$topic"}', + legendFormat="{{topic}}", + format="table", + instant=True, + ), + ], + filterable=True, + transformations=offsets_txs, + gridPos=G.GridPos( + h=default_height, w=table_width, x=table_width * 0, y=offsets_base ), - ], - filterable=True, - transformations=offsets_txs, - gridPos=G.GridPos( - h=defaultHeight, w=tableWidth, x=tableWidth * 0, y=offsets_base ), - ), - G.Table( - title="End Offsets", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_log_log_logendoffset{namespace="$ns",topic=~"$topic"}', - legendFormat="{{topic}}", - format="table", - instant=True, + G.Table( + title="End Offsets", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="kafka_log_log_logendoffset{" + + env_label + + '="$env",topic=~"$topic"}', + legendFormat="{{topic}}", + format="table", + instant=True, + ), + ], + filterable=True, + transformations=offsets_txs, + gridPos=G.GridPos( + h=default_height, w=table_width, x=table_width * 1, y=offsets_base ), - ], - filterable=True, - transformations=offsets_txs, - gridPos=G.GridPos( - h=defaultHeight, w=tableWidth, x=tableWidth * 1, y=offsets_base ), - ), -] -offsets_panels = [ - G.RowPanel( - title="Offsets", - gridPos=G.GridPos(h=1, w=24, x=0, y=offsets_base), - collapsed=True, - panels=offsets_inner, - ), -] + ] + offsets_panels = [ + G.RowPanel( + title="Offsets", + gridPos=G.GridPos(h=1, w=24, x=0, y=offsets_base), + collapsed=True, + panels=offsets_inner, + ), + ] + + panels = throughput_panels + offsets_panels + return G.Dashboard( + title="Kafka topics - v2", + description="Overview of the Kafka topics", + tags=["confluent", "kafka"], + inputs=[ + G.DataSourceInput( + name="DS_PROMETHEUS", + label="Prometheus", + pluginId="prometheus", + pluginName="Prometheus", + ) + ], + templating=templating, + timezone="browser", + panels=panels, + refresh="30s", + ).auto_panel_ids() + -panels = throughput_panels + offsets_panels -dashboard = G.Dashboard( - title="Kafka topics - v2", - description="Overview of the Kafka topics", - tags=["confluent", "kafka"], - inputs=[ - G.DataSourceInput( - name="DS_PROMETHEUS", - label="Prometheus", - pluginId="prometheus", - pluginName="Prometheus", - ) - ], - templating=templating, - timezone="browser", - panels=panels, - refresh="30s", -).auto_panel_ids() +env_label = os.environ.get("ENV_LABEL", "env") +dashboard = dashboard(env_label) diff --git a/grafana-dashboards/ksqldb-cluster.py b/grafana-dashboards/ksqldb-cluster.py index ffe91977..5c98d9b2 100644 --- a/grafana-dashboards/ksqldb-cluster.py +++ b/grafana-dashboards/ksqldb-cluster.py @@ -1,588 +1,632 @@ - +import os import grafanalib.core as G -defaultHeight = 5 -statWidth = 4 -tsWidth = 8 - -templating = G.Templating( - list=[ - G.Template( - name="ns", - label="Namespace", - dataSource="Prometheus", - query="label_values(namespace)", - ), - G.Template( - name="ksqldb_app", - label="ksqlDB cluster", - dataSource="Prometheus", - query='label_values(ksql_ksql_engine_query_stats_num_active_queries{namespace="$ns"},app)', - ), - G.Template( - name="ksqldb_cluster_id", - label="ksqlDB cluster ID", - dataSource="Prometheus", - query='label_values(ksql_ksql_engine_query_stats_num_active_queries{namespace="$ns"},ksql_cluster)', - hide=True, - ), - G.Template( - name="ksqldb_server", - label="ksqlDB server", - dataSource="Prometheus", - query='label_values(ksql_ksql_engine_query_stats_num_active_queries{namespace="$ns",app="$ksqldb_app"}, pod)', - multi=True, - includeAll=True, +def dashboard(env_label='namespace', server_label='pod', ksqldb_cluster_label='app'): + default_height = 5 + stat_width = 4 + ts_width = 8 + + templating = G.Templating( + list=[ + G.Template( + name="env", + label="Environment", + dataSource="Prometheus", + query="label_values("+env_label+")", + ), + G.Template( + name="ksqldb_cluster", + label="ksqlDB cluster", + dataSource="Prometheus", + query='label_values(ksql_ksql_engine_query_stats_num_active_queries{' + env_label + '="$env"},'+ksqldb_cluster_label+')', + ), + G.Template( + name="ksqldb_cluster_id", + label="ksqlDB cluster ID", + dataSource="Prometheus", + query='label_values(ksql_ksql_engine_query_stats_num_active_queries{' + env_label + '="$env"},ksql_cluster)', + hide=2, # true + ), + G.Template( + name="ksqldb_server", + label="ksqlDB server", + dataSource="Prometheus", + query='label_values(ksql_ksql_engine_query_stats_num_active_queries{' + env_label + '="$env",' + ksqldb_cluster_label + '="$ksqldb_cluster"}, '+server_label+')', + multi=True, + includeAll=True, + ), + ] + ) + + + hc_base = 0 + hc_panels = [ + G.RowPanel( + title="Overview", + gridPos=G.GridPos(h=1, w=24, x=0, y=hc_base), + ), + G.Stat( + title="ksqlDB: Online Servers", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='count(ksql_ksql_engine_query_stats_num_active_queries{' + env_label + '="$env", ' + ksqldb_cluster_label + '="$ksqldb_cluster"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 0, y=hc_base), + ), + G.Stat( + title="ksqlDB: Sum of Active Queries", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='sum(ksql_ksql_engine_query_stats_num_active_queries{' + env_label + '="$env", ' + ksqldb_cluster_label + '="$ksqldb_cluster"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 1, y=hc_base), + ), + G.Stat( + title="ksqlDB: Sum of Running Queries", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='sum(ksql_ksql_engine_query_stats_running_queries{' + env_label + '="$env", ' + ksqldb_cluster_label + '="$ksqldb_cluster"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + G.Threshold(index=1, value=1.0, color="green"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 2, y=hc_base), + ), + G.Stat( + title="ksqlDB: Sum of Rebalancing Queries", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='sum(ksql_ksql_engine_query_stats_rebalancing_queries{' + env_label + '="$env", ' + ksqldb_cluster_label + '="$ksqldb_cluster"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + G.Threshold(index=1, value=1.0, color="yellow"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 3, y=hc_base), + ), + G.Stat( + title="Connect: Sum of Failed Queries", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='avg(ksql_ksql_engine_query_stats_error_queries{' + env_label + '="$env", ' + ksqldb_cluster_label + '="$ksqldb_cluster"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + G.Threshold(index=1, value=1.0, color="red"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 4, y=hc_base), + ), + G.TimeSeries( + title="Cluster Liveness", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='ksql_ksql_engine_query_stats_liveness_indicator{' + env_label + '="$env",' + ksqldb_cluster_label + '="$ksqldb_cluster"}', + legendFormat="{{"+server_label+"}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos(h=default_height * 2, w=ts_width, x=0, y=hc_base + 1), + ), + G.TimeSeries( + title="Messages consumed/sec", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='ksql_ksql_engine_query_stats_messages_consumed_per_sec{' + env_label + '="$env",' + ksqldb_cluster_label + '="$ksqldb_cluster"}', + legendFormat="{{"+server_label+"}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="cps", + gridPos=G.GridPos(h=default_height * 2, w=ts_width, x=ts_width * 1, y=hc_base + 1), + ), + G.TimeSeries( + title="Messages produced/sec", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='ksql_ksql_engine_query_stats_messages_produced_per_sec{' + env_label + '="$env",' + ksqldb_cluster_label + '="$ksqldb_cluster"}', + legendFormat="{{"+server_label+"}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="cps", + gridPos=G.GridPos(h=default_height * 2, w=ts_width, x=ts_width * 2, y=hc_base + 1), ), ] -) - - -hc_base = 0 -hc_panels = [ - G.RowPanel( - title="Overview", - gridPos=G.GridPos(h=1, w=24, x=0, y=hc_base), - ), - G.Stat( - title="ksqlDB: Online Servers", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='count(ksql_ksql_engine_query_stats_num_active_queries{namespace="$ns", app="$ksqldb_app"})', - ), - ], - reduceCalc="last", - thresholds=[ - G.Threshold(index=0, value=0.0, color="blue"), - ], - gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 0, y=hc_base), - ), - G.Stat( - title="ksqlDB: Sum of Active Queries", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='sum(ksql_ksql_engine_query_stats_num_active_queries{namespace="$ns", app="$ksqldb_app"})', - ), - ], - reduceCalc="last", - thresholds=[ - G.Threshold(index=0, value=0.0, color="blue"), - ], - gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 1, y=hc_base), - ), - G.Stat( - title="ksqlDB: Sum of Running Queries", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='sum(ksql_ksql_engine_query_stats_running_queries{namespace="$ns", app="$ksqldb_app"})', - ), - ], - reduceCalc="last", - thresholds=[ - G.Threshold(index=0, value=0.0, color="blue"), - G.Threshold(index=1, value=1.0, color="green"), - ], - gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 2, y=hc_base), - ), - G.Stat( - title="ksqlDB: Sum of Rebalancing Queries", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='sum(ksql_ksql_engine_query_stats_rebalancing_queries{namespace="$ns", app="$ksqldb_app"})', - ), - ], - reduceCalc="last", - thresholds=[ - G.Threshold(index=0, value=0.0, color="blue"), - G.Threshold(index=1, value=1.0, color="yellow"), - ], - gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 3, y=hc_base), - ), - G.Stat( - title="Connect: Sum of Failed Queries", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='avg(ksql_ksql_engine_query_stats_error_queries{namespace="$ns", app="$ksqldb_app"})', - ), - ], - reduceCalc="last", - thresholds=[ - G.Threshold(index=0, value=0.0, color="blue"), - G.Threshold(index=1, value=1.0, color="red"), - ], - gridPos=G.GridPos(h=defaultHeight, w=statWidth, x=statWidth * 4, y=hc_base), - ), - - G.TimeSeries( - title="Cluster Liveness", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='ksql_ksql_engine_query_stats_liveness_indicator{namespace="$ns",app="$ksqldb_app"}', - legendFormat="{{pod}}", - ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - gridPos=G.GridPos(h=defaultHeight * 2, w=tsWidth, x=0, y=hc_base + 1), - ), - G.TimeSeries( - title="Messages consumed/sec", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='ksql_ksql_engine_query_stats_messages_consumed_per_sec{namespace="$ns",app="$ksqldb_app"}', - legendFormat="{{pod}}", - ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="cps", - gridPos=G.GridPos(h=defaultHeight * 2, w=tsWidth, x=tsWidth * 1, y=hc_base + 1), - ), - G.TimeSeries( - title="Messages produced/sec", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='ksql_ksql_engine_query_stats_messages_produced_per_sec{namespace="$ns",app="$ksqldb_app"}', - legendFormat="{{pod}}", - ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="cps", - gridPos=G.GridPos(h=defaultHeight * 2, w=tsWidth, x=tsWidth * 2, y=hc_base + 1), - ), -] - -system_base = hc_base + 2 -system_panels = [ - G.RowPanel( - title="System", - gridPos=G.GridPos(h=1, w=24, x=0, y=system_base), - ), - G.TimeSeries( - title="CPU usage", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='irate(process_cpu_seconds_total{namespace="$ns",app="$ksqldb_app",type="ksqldb"}[5m])', - legendFormat="{{pod}}", - ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="percentunit", - gridPos=G.GridPos(h=defaultHeight * 2, w=tsWidth, x=tsWidth * 0, y=system_base), - ), - G.TimeSeries( - title="Memory usage", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='sum without(area)(jvm_memory_bytes_used{namespace="$ns",app="$ksqldb_app",type="ksqldb"})', - legendFormat="{{pod}}", - ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="bytes", - gridPos=G.GridPos(h=defaultHeight * 2, w=tsWidth, x=tsWidth * 1, y=system_base), - ), - G.TimeSeries( - title="GC collection", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='sum without(gc)(irate(jvm_gc_collection_seconds_sum{namespace="$ns",app="$ksqldb_app",type="ksqldb"}[5m]))', - legendFormat="{{pod}}", - ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="percentunit", - gridPos=G.GridPos(h=defaultHeight * 2, w=tsWidth, x=tsWidth * 2, y=system_base), - ), -] - -queries_base = system_base + 1 -queries_inner = [ - G.TimeSeries( - title="Poll Latency (Avg.)", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_streams_stream_thread_metrics_poll_latency_avg{namespace="$ns",app="$ksqldb_app",pod=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', - legendFormat="{{thread_id}}", + + system_base = hc_base + 2 + system_panels = [ + G.RowPanel( + title="System", + gridPos=G.GridPos(h=1, w=24, x=0, y=system_base), + ), + G.TimeSeries( + title="CPU usage", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='irate(process_cpu_seconds_total{' + env_label + '="$env",' + ksqldb_cluster_label + '="$ksqldb_cluster",'+server_label+'=~"$ksqldb_server"}[5m])', + legendFormat="{{"+server_label+"}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="percentunit", + gridPos=G.GridPos(h=default_height * 2, w=ts_width, x=ts_width * 0, y=system_base), + ), + G.TimeSeries( + title="Memory usage", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='sum without(area)(jvm_memory_bytes_used{' + env_label + '="$env",' + ksqldb_cluster_label + '="$ksqldb_cluster",'+server_label+'=~"$ksqldb_server"})', + legendFormat="{{"+server_label+"}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="bytes", + gridPos=G.GridPos(h=default_height * 2, w=ts_width, x=ts_width * 1, y=system_base), + ), + G.TimeSeries( + title="GC collection", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='sum without(gc)(irate(jvm_gc_collection_seconds_sum{' + env_label + '="$env",' + ksqldb_cluster_label + '="$ksqldb_cluster",'+server_label+'=~"$ksqldb_server"}[5m]))', + legendFormat="{{"+server_label+"}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="percentunit", + gridPos=G.GridPos(h=default_height * 2, w=ts_width, x=ts_width * 2, y=system_base), + ), + ] + + queries_base = system_base + 1 + queries_inner = [ + G.TimeSeries( + title="Poll Latency (Avg.)", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_streams_stream_thread_metrics_poll_latency_avg{' + env_label + '="$env",' + ksqldb_cluster_label + '="$ksqldb_cluster",' + server_label + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + legendFormat="{{thread_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=queries_base ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="ms", - gridPos=G.GridPos(h=defaultHeight * 2, w=tsWidth, x=tsWidth * 0, y=queries_base), - ), - G.TimeSeries( - title="Poll Latency (Max.)", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_streams_stream_thread_metrics_poll_latency_max{namespace="$ns",app="$ksqldb_app",pod=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', - legendFormat="{{thread_id}}", + ), + G.TimeSeries( + title="Poll Latency (Max.)", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_streams_stream_thread_metrics_poll_latency_max{' + env_label + '="$env",' + ksqldb_cluster_label + '="$ksqldb_cluster",' + server_label + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + legendFormat="{{thread_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=queries_base ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="ms", - gridPos=G.GridPos(h=defaultHeight * 2, w=tsWidth, x=tsWidth * 1, y=queries_base), - ), - - G.TimeSeries( - title="Process Latency (Avg.)", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_streams_stream_thread_metrics_process_latency_avg{namespace="$ns",app="$ksqldb_app",pod=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', - legendFormat="{{thread_id}}", + ), + G.TimeSeries( + title="Process Latency (Avg.)", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_streams_stream_thread_metrics_process_latency_avg{' + env_label + '="$env",' + ksqldb_cluster_label + '="$ksqldb_cluster",' + server_label + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + legendFormat="{{thread_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=queries_base + 1 ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="ms", - gridPos=G.GridPos(h=defaultHeight * 2, w=tsWidth, x=tsWidth * 0, y=queries_base + 1), - ), - G.TimeSeries( - title="Process Latency (Max.)", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_streams_stream_thread_metrics_process_latency_max{namespace="$ns",app="$ksqldb_app",pod=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', - legendFormat="{{thread_id}}", + ), + G.TimeSeries( + title="Process Latency (Max.)", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_streams_stream_thread_metrics_process_latency_max{' + env_label + '="$env",' + ksqldb_cluster_label + '="$ksqldb_cluster",' + server_label + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + legendFormat="{{thread_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=queries_base + 1 ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="ms", - gridPos=G.GridPos(h=defaultHeight * 2, w=tsWidth, x=tsWidth * 1, y=queries_base + 1), - ), - - G.TimeSeries( - title="Commit Latency (Avg.)", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_streams_stream_thread_metrics_commit_latency_avg{namespace="$ns",app="$ksqldb_app",pod=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', - legendFormat="{{thread_id}}", + ), + G.TimeSeries( + title="Commit Latency (Avg.)", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_streams_stream_thread_metrics_commit_latency_avg{' + env_label + '="$env",' + ksqldb_cluster_label + '="$ksqldb_cluster",' + server_label + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + legendFormat="{{thread_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=queries_base + 2 ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="ms", - gridPos=G.GridPos(h=defaultHeight * 2, w=tsWidth, x=tsWidth * 0, y=queries_base + 2), - ), - G.TimeSeries( - title="Commit Latency (Max.)", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_streams_stream_thread_metrics_commit_latency_max{namespace="$ns",app="$ksqldb_app",pod=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', - legendFormat="{{thread_id}}", + ), + G.TimeSeries( + title="Commit Latency (Max.)", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_streams_stream_thread_metrics_commit_latency_max{' + env_label + '="$env",' + ksqldb_cluster_label + '="$ksqldb_cluster",' + server_label + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + legendFormat="{{thread_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=queries_base + 2 ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="ms", - gridPos=G.GridPos(h=defaultHeight * 2, w=tsWidth, x=tsWidth * 1, y=queries_base + 2), - ), - - G.TimeSeries( - title="Punctuate Latency (Avg.)", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_streams_stream_thread_metrics_punctuate_latency_avg{namespace="$ns",app="$ksqldb_app",pod=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', - legendFormat="{{thread_id}}", + ), + G.TimeSeries( + title="Punctuate Latency (Avg.)", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_streams_stream_thread_metrics_punctuate_latency_avg{' + env_label + '="$env",' + ksqldb_cluster_label + '="$ksqldb_cluster",' + server_label + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + legendFormat="{{thread_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=queries_base + 3 ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="ms", - gridPos=G.GridPos(h=defaultHeight * 2, w=tsWidth, x=tsWidth * 0, y=queries_base + 3), - ), - G.TimeSeries( - title="Punctuate Latency (Max.)", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_streams_stream_thread_metrics_punctuate_latency_max{namespace="$ns",app="$ksqldb_app",pod=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', - legendFormat="{{thread_id}}", + ), + G.TimeSeries( + title="Punctuate Latency (Max.)", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_streams_stream_thread_metrics_punctuate_latency_max{' + env_label + '="$env",' + ksqldb_cluster_label + '="$ksqldb_cluster",' + server_label + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + legendFormat="{{thread_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=queries_base + 3 ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="ms", - gridPos=G.GridPos(h=defaultHeight * 2, w=tsWidth, x=tsWidth * 1, y=queries_base + 3), - ), -] -queries_panels = [ - G.RowPanel( - title="Queries Performance", - gridPos=G.GridPos(h=1, w=24, x=0, y=queries_base), - collapsed=True, - panels=queries_inner, - ), -] - -stores_base = queries_base + 4 -stores_inner = [ - G.TimeSeries( - title="Put Rate", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_streams_stream_state_metrics_put_rate{namespace="$ns",app="$ksqldb_app",pod=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', - legendFormat="{{thread_id}}", + ), + ] + queries_panels = [ + G.RowPanel( + title="Queries Performance", + gridPos=G.GridPos(h=1, w=24, x=0, y=queries_base), + collapsed=True, + panels=queries_inner, + ), + ] + + stores_base = queries_base + 4 + stores_inner = [ + G.TimeSeries( + title="Put Rate", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_streams_stream_state_metrics_put_rate{' + env_label + '="$env",' + ksqldb_cluster_label + '="$ksqldb_cluster",' + server_label + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + legendFormat="{{thread_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ops", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=stores_base + 0 ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="ops", - gridPos=G.GridPos(h=defaultHeight * 2, w=tsWidth, x=tsWidth * 0, y=stores_base + 0), - ), - G.TimeSeries( - title="Put Latency (Avg.)", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_streams_stream_state_metrics_put_latency_avg{namespace="$ns",app="$ksqldb_app",pod=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', - legendFormat="{{thread_id}}", + ), + G.TimeSeries( + title="Put Latency (Avg.)", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_streams_stream_state_metrics_put_latency_avg{' + env_label + '="$env",' + ksqldb_cluster_label + '="$ksqldb_cluster",' + server_label + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + legendFormat="{{thread_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=stores_base + 0 ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="ms", - gridPos=G.GridPos(h=defaultHeight * 2, w=tsWidth, x=tsWidth * 1, y=stores_base + 0), - ), - G.TimeSeries( - title="Put Latency (Max.)", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_streams_stream_state_metrics_put_latency_max{namespace="$ns",app="$ksqldb_app",pod=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', - legendFormat="{{thread_id}}", + ), + G.TimeSeries( + title="Put Latency (Max.)", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_streams_stream_state_metrics_put_latency_max{' + env_label + '="$env",' + ksqldb_cluster_label + '="$ksqldb_cluster",' + server_label + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + legendFormat="{{thread_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=stores_base + 0 ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="ms", - gridPos=G.GridPos(h=defaultHeight * 2, w=tsWidth, x=tsWidth * 2, y=stores_base + 0), - ), - - G.TimeSeries( - title="Put if absent Rate", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_streams_stream_state_metrics_put_if_absent_rate{namespace="$ns",app="$ksqldb_app",pod=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', - legendFormat="{{thread_id}}", + ), + G.TimeSeries( + title="Put if absent Rate", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_streams_stream_state_metrics_put_if_absent_rate{' + env_label + '="$env",' + ksqldb_cluster_label + '="$ksqldb_cluster",' + server_label + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + legendFormat="{{thread_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ops", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=stores_base + 1 ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="ops", - gridPos=G.GridPos(h=defaultHeight * 2, w=tsWidth, x=tsWidth * 0, y=stores_base + 1), - ), - G.TimeSeries( - title="Put if absent Latency (Avg.)", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_streams_stream_state_metrics_put_if_absent_latency_avg{namespace="$ns",app="$ksqldb_app",pod=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', - legendFormat="{{thread_id}}", + ), + G.TimeSeries( + title="Put if absent Latency (Avg.)", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_streams_stream_state_metrics_put_if_absent_latency_avg{' + env_label + '="$env",' + ksqldb_cluster_label + '="$ksqldb_cluster",' + server_label + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + legendFormat="{{thread_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=stores_base + 1 ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="ms", - gridPos=G.GridPos(h=defaultHeight * 2, w=tsWidth, x=tsWidth * 1, y=stores_base + 1), - ), - G.TimeSeries( - title="Put if absent Latency (Max.)", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_streams_stream_state_metrics_put_if_absent_latency_max{namespace="$ns",app="$ksqldb_app",pod=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', - legendFormat="{{thread_id}}", + ), + G.TimeSeries( + title="Put if absent Latency (Max.)", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_streams_stream_state_metrics_put_if_absent_latency_max{' + env_label + '="$env",' + ksqldb_cluster_label + '="$ksqldb_cluster",' + server_label + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + legendFormat="{{thread_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=stores_base + 1 ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="ms", - gridPos=G.GridPos(h=defaultHeight * 2, w=tsWidth, x=tsWidth * 2, y=stores_base + 1), - ), - - G.TimeSeries( - title="Fetch Rate", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_streams_stream_state_metrics_fetch_rate{namespace="$ns",app="$ksqldb_app",pod=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', - legendFormat="{{thread_id}}", + ), + G.TimeSeries( + title="Fetch Rate", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_streams_stream_state_metrics_fetch_rate{' + env_label + '="$env",' + ksqldb_cluster_label + '="$ksqldb_cluster",' + server_label + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + legendFormat="{{thread_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ops", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=stores_base + 2 ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="ops", - gridPos=G.GridPos(h=defaultHeight * 2, w=tsWidth, x=tsWidth * 0, y=stores_base + 2), - ), - G.TimeSeries( - title="Fetch Latency (Avg.)", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_streams_stream_state_metrics_fetch_latency_avg{namespace="$ns",app="$ksqldb_app",pod=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', - legendFormat="{{thread_id}}", + ), + G.TimeSeries( + title="Fetch Latency (Avg.)", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_streams_stream_state_metrics_fetch_latency_avg{' + env_label + '="$env",' + ksqldb_cluster_label + '="$ksqldb_cluster",' + server_label + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + legendFormat="{{thread_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=stores_base + 2 ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="ms", - gridPos=G.GridPos(h=defaultHeight * 2, w=tsWidth, x=tsWidth * 1, y=stores_base + 2), - ), - G.TimeSeries( - title="Fetch Latency (Max.)", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_streams_stream_state_metrics_fetch_latency_max{namespace="$ns",app="$ksqldb_app",pod=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', - legendFormat="{{thread_id}}", + ), + G.TimeSeries( + title="Fetch Latency (Max.)", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_streams_stream_state_metrics_fetch_latency_max{' + env_label + '="$env",' + ksqldb_cluster_label + '="$ksqldb_cluster",' + server_label + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + legendFormat="{{thread_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=stores_base + 2 ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="ms", - gridPos=G.GridPos(h=defaultHeight * 2, w=tsWidth, x=tsWidth * 2, y=stores_base + 2), - ), - - G.TimeSeries( - title="Delete Rate", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_streams_stream_state_metrics_delete_rate{namespace="$ns",app="$ksqldb_app",pod=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', - legendFormat="{{thread_id}}", + ), + G.TimeSeries( + title="Delete Rate", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_streams_stream_state_metrics_delete_rate{' + env_label + '="$env",' + ksqldb_cluster_label + '="$ksqldb_cluster",' + server_label + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + legendFormat="{{thread_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ops", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=stores_base + 3 ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="ops", - gridPos=G.GridPos(h=defaultHeight * 2, w=tsWidth, x=tsWidth * 0, y=stores_base + 3), - ), - G.TimeSeries( - title="Delete Latency (Avg.)", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_streams_stream_state_metrics_delete_latency_avg{namespace="$ns",app="$ksqldb_app",pod=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', - legendFormat="{{thread_id}}", + ), + G.TimeSeries( + title="Delete Latency (Avg.)", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_streams_stream_state_metrics_delete_latency_avg{' + env_label + '="$env",' + ksqldb_cluster_label + '="$ksqldb_cluster",' + server_label + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + legendFormat="{{thread_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=stores_base + 3 ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="ms", - gridPos=G.GridPos(h=defaultHeight * 2, w=tsWidth, x=tsWidth * 1, y=stores_base + 3), - ), - G.TimeSeries( - title="Delete Latency (Max.)", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_streams_stream_state_metrics_delete_latency_max{namespace="$ns",app="$ksqldb_app",pod=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', - legendFormat="{{thread_id}}", + ), + G.TimeSeries( + title="Delete Latency (Max.)", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_streams_stream_state_metrics_delete_latency_max{' + env_label + '="$env",' + ksqldb_cluster_label + '="$ksqldb_cluster",' + server_label + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + legendFormat="{{thread_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=stores_base + 3 ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="ms", - gridPos=G.GridPos(h=defaultHeight * 2, w=tsWidth, x=tsWidth * 2, y=stores_base + 3), - ), - - G.TimeSeries( - title="Restore Rate", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_streams_stream_state_metrics_restore_rate{namespace="$ns",app="$ksqldb_app",pod=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', - legendFormat="{{thread_id}}", + ), + G.TimeSeries( + title="Restore Rate", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_streams_stream_state_metrics_restore_rate{' + env_label + '="$env",' + ksqldb_cluster_label + '="$ksqldb_cluster",' + server_label + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + legendFormat="{{thread_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ops", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=stores_base + 4 ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="ops", - gridPos=G.GridPos(h=defaultHeight * 2, w=tsWidth, x=tsWidth * 0, y=stores_base + 4), - ), - G.TimeSeries( - title="Restore Latency (Avg.)", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_streams_stream_state_metrics_restore_latency_avg{namespace="$ns",app="$ksqldb_app",pod=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', - legendFormat="{{thread_id}}", + ), + G.TimeSeries( + title="Restore Latency (Avg.)", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_streams_stream_state_metrics_restore_latency_avg{' + env_label + '="$env",' + ksqldb_cluster_label + '="$ksqldb_cluster",' + server_label + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + legendFormat="{{thread_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=stores_base + 4 ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="ms", - gridPos=G.GridPos(h=defaultHeight * 2, w=tsWidth, x=tsWidth * 1, y=stores_base + 4), - ), - G.TimeSeries( - title="Restore Latency (Max.)", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_streams_stream_state_metrics_restore_latency_max{namespace="$ns",app="$ksqldb_app",pod=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', - legendFormat="{{thread_id}}", + ), + G.TimeSeries( + title="Restore Latency (Max.)", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='kafka_streams_stream_state_metrics_restore_latency_max{' + env_label + '="$env",' + ksqldb_cluster_label + '="$ksqldb_cluster",' + server_label + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + legendFormat="{{thread_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=stores_base + 4 ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="ms", - gridPos=G.GridPos(h=defaultHeight * 2, w=tsWidth, x=tsWidth * 2, y=stores_base + 4), - ), -] -stores_panels = [ - G.RowPanel( - title="State Stores", - gridPos=G.GridPos(h=1, w=24, x=0, y=stores_base), - collapsed=True, - panels=stores_inner, - ), -] - -panels = hc_panels + system_panels + queries_panels + stores_panels + ), + ] + stores_panels = [ + G.RowPanel( + title="State Stores", + gridPos=G.GridPos(h=1, w=24, x=0, y=stores_base), + collapsed=True, + panels=stores_inner, + ), + ] + + panels = hc_panels + system_panels + queries_panels + stores_panels + + return G.Dashboard( + title="ksqlDB cluster - v2", + description="Overview of ksqlDB clusters.", + tags=[ + "confluent", + "ksqldb", + ], + inputs=[ + G.DataSourceInput( + name="DS_PROMETHEUS", + label="Prometheus", + pluginId="prometheus", + pluginName="Prometheus", + ) + ], + templating=templating, + timezone="browser", + panels=panels, + refresh="30s", + ).auto_panel_ids() -dashboard = G.Dashboard( - title="ksqlDB cluster - v2", - description="Overview of ksqlDB clusters.", - tags=[ - "confluent", - "ksqldb", - ], - inputs=[ - G.DataSourceInput( - name="DS_PROMETHEUS", - label="Prometheus", - pluginId="prometheus", - pluginName="Prometheus", - ) - ], - templating=templating, - timezone="browser", - panels=panels, - refresh="30s", -).auto_panel_ids() +env_label = os.environ.get("ENV_LABEL", "env") +server_label = os.environ.get("SERVER_LABEL", "hostname") +ksqldb_cluster_label = os.environ.get("KSQLDB_CLUSTER_LABEL", "ksqldb_cluster_id") +dashboard = dashboard(env_label, server_label, ksqldb_cluster_label) diff --git a/grafana-dashboards/schema-registry-cluster.py b/grafana-dashboards/schema-registry-cluster.py index ef8c3bdd..a3497fa2 100644 --- a/grafana-dashboards/schema-registry-cluster.py +++ b/grafana-dashboards/schema-registry-cluster.py @@ -1,178 +1,196 @@ +import os import grafanalib.core as G -defaultHeight = 5 -statWidth = 4 -tsWidth = 8 - -templating = G.Templating( - list=[ - G.Template( - name="ns", - label="Namespace", - dataSource="Prometheus", - query="label_values(namespace)", - ), - ] -) - -healthcheck_base = 0 -healthcheck_panels = [ - G.RowPanel( - title="Health-check", - gridPos=G.GridPos(h=1, w=24, x=0, y=0), - ), - G.Stat( - title="SR: Online instances", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='count(kafka_schema_registry_registered_count{namespace="$ns"})', +def dashboard(env_label='namespace',server_label='pod'): + default_height = 5 + stat_width = 4 + ts_width = 8 + + templating = G.Templating( + list=[ + G.Template( + name="env", + label="Environment", + dataSource="Prometheus", + query="label_values("+env_label+")", ), - ], - reduceCalc="last", - thresholds=[ - G.Threshold(index=0, value=0.0, color="red"), - G.Threshold(index=1, value=1.0, color="yellow"), - G.Threshold(index=2, value=2.0, color="green"), - ], - gridPos=G.GridPos( - h=defaultHeight, w=statWidth, x=statWidth * 0, y=healthcheck_base - ), - ), - G.Stat( - title="SR: Sum of Registered Schemas", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='avg(kafka_schema_registry_registered_count{namespace="$ns"})', - instant=True, + G.Template( + name="sr_server", + label="Server", + dataSource="Prometheus", + query="label_values(kafka_schema_registry_registered_count{" + + env_label + + '="$env"}, ' + + server_label + + ")", + multi=True, + includeAll=True, ), - ], - reduceCalc="last", - thresholds=[ - G.Threshold(index=0, value=0.0, color="blue"), - ], - gridPos=G.GridPos( - h=defaultHeight, w=statWidth, x=statWidth * 1, y=healthcheck_base + ] + ) + + healthcheck_base = 0 + healthcheck_panels = [ + G.RowPanel( + title="Overview", + gridPos=G.GridPos(h=1, w=24, x=0, y=0), ), - ), - G.Stat( - title="SR: Sum of Created Schemas by Type", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='avg(kafka_schema_registry_schemas_created{namespace="$ns"}) by (schema_type)', - legendFormat="{{schema_type}}", + G.Stat( + title="SR: Online instances", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='count(kafka_schema_registry_registered_count{' + env_label + '="$env"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="red"), + G.Threshold(index=1, value=1.0, color="yellow"), + G.Threshold(index=2, value=2.0, color="green"), + ], + gridPos=G.GridPos( + h=default_height, w=stat_width, x=stat_width * 0, y=healthcheck_base ), - ], - reduceCalc="last", - thresholds=[ - G.Threshold(index=0, value=0.0, color="blue"), - ], - gridPos=G.GridPos( - h=defaultHeight, w=statWidth, x=statWidth * 2, y=healthcheck_base ), - ), - G.Stat( - title="SR: Sum of Deleted Schemas by Type", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='sum(kafka_schema_registry_schemas_deleted{namespace="$ns"}) by (schema_type)', - legendFormat="{{schema_type}}", + G.Stat( + title="SR: Sum of Registered Schemas", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='avg(kafka_schema_registry_registered_count{' + env_label + '="$env"})', + instant=True, + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos( + h=default_height, w=stat_width, x=stat_width * 1, y=healthcheck_base ), - ], - reduceCalc="last", - thresholds=[ - G.Threshold(index=0, value=0.0, color="blue"), - ], - gridPos=G.GridPos( - h=defaultHeight, w=statWidth, x=statWidth * 3, y=healthcheck_base ), - ), - G.Stat( - title="SR: Sum of Active Connections", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr="sum(kafka_schema_registry_kafka_schema_registry_metrics_connection_count)", + G.Stat( + title="SR: Sum of Created Schemas by Type", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='avg(kafka_schema_registry_schemas_created{' + env_label + '="$env"}) by (schema_type)', + legendFormat="{{schema_type}}", + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos( + h=default_height, w=stat_width, x=stat_width * 2, y=healthcheck_base ), - ], - reduceCalc="last", - thresholds=[ - G.Threshold(index=0, value=0.0, color="blue"), - ], - gridPos=G.GridPos( - h=defaultHeight, w=statWidth, x=statWidth * 4, y=healthcheck_base ), - ), -] - -system_panels = [ - G.RowPanel( - title="System", - gridPos=G.GridPos(h=1, w=24, x=0, y=1), - ), - G.TimeSeries( - title="CPU usage", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='irate(process_cpu_seconds_total{namespace="$ns",type="schemaregistry"}[5m])', - legendFormat="{{pod}}", - ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="percentunit", - gridPos=G.GridPos(h=defaultHeight * 2, w=tsWidth, x=tsWidth * 0, y=1), - ), - G.TimeSeries( - title="Memory usage", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='sum without(area)(jvm_memory_bytes_used{namespace="$ns",type="schemaregistry"})', - legendFormat="{{pod}}", + G.Stat( + title="SR: Sum of Deleted Schemas by Type", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='sum(kafka_schema_registry_schemas_deleted{' + env_label + '="$env"}) by (schema_type)', + legendFormat="{{schema_type}}", + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos( + h=default_height, w=stat_width, x=stat_width * 3, y=healthcheck_base ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="bytes", - gridPos=G.GridPos(h=defaultHeight * 2, w=tsWidth, x=tsWidth * 1, y=1), - ), - G.TimeSeries( - title="GC collection", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='sum without(gc)(irate(jvm_gc_collection_seconds_sum{namespace="$ns",type="schemaregistry"}[5m]))', - legendFormat="{{pod}}", + ), + G.Stat( + title="SR: Sum of Active Connections", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="sum(kafka_schema_registry_kafka_schema_registry_metrics_connection_count)", + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos( + h=default_height, w=stat_width, x=stat_width * 4, y=healthcheck_base ), + ), + ] + + system_panels = [ + G.RowPanel( + title="System", + gridPos=G.GridPos(h=1, w=24, x=0, y=1), + ), + G.TimeSeries( + title="CPU usage", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='irate(process_cpu_seconds_total{' + env_label + '="$env",'+server_label+'=~"$sr_server"}[5m])', + legendFormat="{{"+server_label+"}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="percentunit", + gridPos=G.GridPos(h=default_height * 2, w=ts_width, x=ts_width * 0, y=1), + ), + G.TimeSeries( + title="Memory usage", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='sum without(area)(jvm_memory_bytes_used{' + env_label + '="$env",'+server_label+'=~"$sr_server"})', + legendFormat="{{"+server_label+"}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="bytes", + gridPos=G.GridPos(h=default_height * 2, w=ts_width, x=ts_width * 1, y=1), + ), + G.TimeSeries( + title="GC collection", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr='sum without(gc)(irate(jvm_gc_collection_seconds_sum{' + env_label + '="$env",'+server_label+'=~"$sr_server"}[5m]))', + legendFormat="{{"+server_label+"}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="percentunit", + gridPos=G.GridPos(h=default_height * 2, w=ts_width, x=ts_width * 2, y=1), + ), + ] + + panels = healthcheck_panels + system_panels + + return G.Dashboard( + title="Schema Registry cluster - v2", + description="Overview of the Schema Registry cluster", + tags=["confluent", "schema-registry"], + inputs=[ + G.DataSourceInput( + name="DS_PROMETHEUS", + label="Prometheus", + pluginId="prometheus", + pluginName="Prometheus", + ) ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="percentunit", - gridPos=G.GridPos(h=defaultHeight * 2, w=tsWidth, x=tsWidth * 2, y=1), - ), -] - -panels = healthcheck_panels + system_panels + templating=templating, + timezone="browser", + panels=panels, + refresh="30s", + ).auto_panel_ids() -dashboard = G.Dashboard( - title="Schema Registry cluster - v2", - description="Overview of the Schema Registry cluster", - tags=["confluent", "schema-registry"], - inputs=[ - G.DataSourceInput( - name="DS_PROMETHEUS", - label="Prometheus", - pluginId="prometheus", - pluginName="Prometheus", - ) - ], - templating=templating, - timezone="browser", - panels=panels, - refresh="30s", -).auto_panel_ids() +env_label = os.environ.get("ENV_LABEL", "env") +server_label = os.environ.get("SERVER_LABEL", "hostname") +dashboard = dashboard(env_label, server_label) diff --git a/grafana-dashboards/zookeeper-cluster.py b/grafana-dashboards/zookeeper-cluster.py index 2e60aad1..0e1b34d7 100644 --- a/grafana-dashboards/zookeeper-cluster.py +++ b/grafana-dashboards/zookeeper-cluster.py @@ -1,311 +1,376 @@ +import os import grafanalib.core as G -hcHeight = 5 -statWidth = 4 -tsWidth = 8 -templating = G.Templating( - list=[ - G.Template( - name="ns", - label="Namespace", - dataSource="Prometheus", - query="label_values(namespace)", - ), - G.Template( - name="quantile", - label="Quantile", - dataSource="Prometheus", - query='label_values(kafka_server_zookeeperclientmetrics_zookeeperrequestlatencyms{namespace="$ns"}, quantile)', - ), - ] -) +def dashboard(env_label="namespace", server_label="pod"): + default_height = 5 + stat_width = 4 + ts_width = 8 -healthcheck_panels = [ - G.RowPanel( - title="Health-check", - gridPos=G.GridPos(h=1, w=24, x=0, y=0), - ), - G.Stat( - title="ZK: Quorum Size", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='count(zookeeper_status_quorumsize{namespace="$ns"})', + templating = G.Templating( + list=[ + G.Template( + name="env", + label="Environment", + dataSource="Prometheus", + query="label_values(" + env_label + ")", ), - ], - reduceCalc="last", - thresholds=[ - G.Threshold(index=0, value=0.0, color="red"), - G.Threshold(index=1, value=2.0, color="yellow"), - G.Threshold(index=2, value=3.0, color="green"), - ], - gridPos=G.GridPos(h=hcHeight, w=statWidth, x=statWidth * 0, y=0), - ), - G.Stat( - title="ZK: Avg. number of ZNodes", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='avg(zookeeper_inmemorydatatree_nodecount{namespace="$ns"})', + G.Template( + name="zk_server", + label="Server", + dataSource="Prometheus", + query="label_values(zookeeper_outstandingrequests{" + + env_label + + '="$env"}, ' + + server_label + + ")", + multi=True, + includeAll=True, ), - ], - reduceCalc="last", - thresholds=[ - G.Threshold(index=0, value=0.0, color="blue"), - ], - gridPos=G.GridPos(h=hcHeight, w=statWidth, x=statWidth * 1, y=0), - ), - G.Stat( - title="ZK: Sum of number of Alive Connections", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='sum(zookeeper_numaliveconnections{namespace="$ns"})', + G.Template( + name="quantile", + label="Quantile", + dataSource="Prometheus", + query="label_values(kafka_server_zookeeperclientmetrics_zookeeperrequestlatencyms{" + + env_label + + '="$env"}, quantile)', ), - ], - reduceCalc="last", - thresholds=[ - G.Threshold(index=0, value=0.0, color="blue"), - ], - gridPos=G.GridPos(h=hcHeight, w=statWidth, x=statWidth * 2, y=0), - ), - G.Stat( - title="ZK: Sum of watchers", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='sum(zookeeper_inmemorydatatree_watchcount{namespace="$ns"})', - ), - ], - reduceCalc="last", - thresholds=[ - G.Threshold(index=0, value=0.0, color="blue"), - ], - gridPos=G.GridPos(h=hcHeight, w=statWidth, x=statWidth * 3, y=0), - ), - G.TimeSeries( - title="ZK: Outstanding Requests", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='zookeeper_outstandingrequests{namespace="$ns"}', - legendFormat="{{pod}} ({{server_id}}:{{member_type}})", - ), - ], - legendDisplayMode="table", - legendCalcs=["max", "last"], - legendPlacement="right", - gridPos=G.GridPos(h=hcHeight, w=tsWidth, x=statWidth * 4, y=0), - ), -] + ] + ) -system_panels = [ - G.RowPanel( - title="System", - gridPos=G.GridPos(h=1, w=24, x=0, y=1), - ), - G.TimeSeries( - title="CPU usage", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='irate(process_cpu_seconds_total{namespace="$ns",type="zookeeper"}[5m])', - legendFormat="{{pod}}", - ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="percentunit", - gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 0, y=1), - ), - G.TimeSeries( - title="Memory usage", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='sum without(area)(jvm_memory_bytes_used{namespace="$ns",type="zookeeper"})', - legendFormat="{{pod}}", - ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="bytes", - gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 1, y=1), - ), - G.TimeSeries( - title="GC collection", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='sum without(gc)(irate(jvm_gc_collection_seconds_sum{namespace="$ns",type="zookeeper"}[5m]))', - legendFormat="{{pod}}", - ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="percentunit", - gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 2, y=1), - ), -] + healthcheck_panels = [ + G.RowPanel( + title="Overview", + gridPos=G.GridPos(h=1, w=24, x=0, y=0), + ), + G.Stat( + title="ZK: Quorum Size", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="count(zookeeper_status_quorumsize{" + env_label + '="$env"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="red"), + G.Threshold(index=1, value=2.0, color="yellow"), + G.Threshold(index=2, value=3.0, color="green"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 0, y=0), + ), + G.Stat( + title="ZK: Avg. number of ZNodes", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="avg(zookeeper_inmemorydatatree_nodecount{" + + env_label + + '="$env"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 1, y=0), + ), + G.Stat( + title="ZK: Sum of number of Alive Connections", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="sum(zookeeper_numaliveconnections{" + env_label + '="$env"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 2, y=0), + ), + G.Stat( + title="ZK: Sum of watchers", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="sum(zookeeper_inmemorydatatree_watchcount{" + + env_label + + '="$env"})', + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 3, y=0), + ), + G.TimeSeries( + title="ZK: Outstanding Requests", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="zookeeper_outstandingrequests{" + env_label + '="$env"}', + legendFormat="{{" + + server_label + + "}} ({{server_id}}:{{member_type}})", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "last"], + legendPlacement="right", + gridPos=G.GridPos(h=default_height, w=ts_width, x=stat_width * 4, y=0), + ), + ] -# TODO: validate if latency metrics make sense. -# Values are high-watermark of the metric and multiplied by tick-time to represent milliseconds. -latency_inner = [ - G.TimeSeries( - title="ZK: Request Latency (Minimum)", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='zookeeper_minrequestlatency{namespace="$ns"} * zookeeper_ticktime', - legendFormat="{{pod}}", - ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="ms", - gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 0, y=2), - ), - G.TimeSeries( - title="ZK: Request Latency (Average)", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='zookeeper_avgrequestlatency{namespace="$ns"} * zookeeper_ticktime', - legendFormat="{{pod}}", - ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="ms", - gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 1, y=2), - ), - G.TimeSeries( - title="ZK: Request Latency (Maximum)", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='zookeeper_maxrequestlatency{namespace="$ns"} * zookeeper_ticktime', - legendFormat="{{pod}}", - ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="ms", - gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 2, y=2), - ), -] -latency_panels = [ - G.RowPanel( - title="Server Latency", - gridPos=G.GridPos(h=1, w=24, x=0, y=2), - collapsed=True, - panels=latency_inner, - ), -] + system_panels = [ + G.RowPanel( + title="System", + gridPos=G.GridPos(h=1, w=24, x=0, y=1), + ), + G.TimeSeries( + title="CPU usage", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="irate(process_cpu_seconds_total{" + + env_label + + '="$env",' + + server_label + + '=~"$zk_server"}[5m])', + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="percentunit", + gridPos=G.GridPos(h=default_height * 2, w=ts_width, x=ts_width * 0, y=1), + ), + G.TimeSeries( + title="Memory usage", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="sum without(area)(jvm_memory_bytes_used{" + + env_label + + '="$env",' + + server_label + + '="$zk_server"})', + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="bytes", + gridPos=G.GridPos(h=default_height * 2, w=ts_width, x=ts_width * 1, y=1), + ), + G.TimeSeries( + title="GC collection", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="sum without(gc)(irate(jvm_gc_collection_seconds_sum{" + + env_label + + '="$env",' + + server_label + + '="$zk_server"}[5m]))', + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="percentunit", + gridPos=G.GridPos(h=default_height * 2, w=ts_width, x=ts_width * 2, y=1), + ), + ] + + # TODO: validate if latency metrics make sense. + # Values are high-watermark of the metric and multiplied by tick-time to represent milliseconds. + latency_inner = [ + G.TimeSeries( + title="ZK: Request Latency (Minimum)", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="zookeeper_minrequestlatency{" + + env_label + + '="$env"} * zookeeper_ticktime', + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos(h=default_height * 2, w=ts_width, x=ts_width * 0, y=2), + ), + G.TimeSeries( + title="ZK: Request Latency (Average)", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="zookeeper_avgrequestlatency{" + + env_label + + '="$env"} * zookeeper_ticktime', + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos(h=default_height * 2, w=ts_width, x=ts_width * 1, y=2), + ), + G.TimeSeries( + title="ZK: Request Latency (Maximum)", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="zookeeper_maxrequestlatency{" + + env_label + + '="$env"} * zookeeper_ticktime', + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos(h=default_height * 2, w=ts_width, x=ts_width * 2, y=2), + ), + ] + latency_panels = [ + G.RowPanel( + title="Server Latency", + gridPos=G.GridPos(h=1, w=24, x=0, y=2), + collapsed=True, + panels=latency_inner, + ), + ] -kafka_base = 2 + 1 -kafka_inner = [ - G.TimeSeries( - title="Kafka: Request Latency", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_server_zookeeperclientmetrics_zookeeperrequestlatencyms{namespace="$ns",quantile=~"$quantile"}', - legendFormat="{{pod}}", + kafka_base = 2 + 1 + kafka_inner = [ + G.TimeSeries( + title="Kafka: Request Latency", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="kafka_server_zookeeperclientmetrics_zookeeperrequestlatencyms{" + + env_label + + '="$env",quantile=~"$quantile"}', + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=kafka_base ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="ms", - gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 0, y=kafka_base), - ), - G.TimeSeries( - title="Kafka: Sync Connections/sec", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_server_sessionexpirelistener_zookeepersyncconnectspersec{namespace="$ns"}', - legendFormat="{{pod}}", + ), + G.TimeSeries( + title="Kafka: Sync Connections/sec", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="kafka_server_sessionexpirelistener_zookeepersyncconnectspersec{" + + env_label + + '="$env"}', + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + stacking={"mode": "normal"}, + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=kafka_base ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="ms", - stacking={"mode": "normal"}, - gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 1, y=kafka_base), - ), - G.TimeSeries( - title="Kafka: Expired Connections/sec", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_server_sessionexpirelistener_zookeeperexpirespersec{namespace="$ns"}', - legendFormat="{{pod}}", + ), + G.TimeSeries( + title="Kafka: Expired Connections/sec", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="kafka_server_sessionexpirelistener_zookeeperexpirespersec{" + + env_label + + '="$env"}', + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + stacking={"mode": "normal"}, + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=kafka_base ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="ms", - stacking={"mode": "normal"}, - gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 2, y=kafka_base), - ), - G.TimeSeries( - title="Kafka: Disconnected Connections/sec", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_server_sessionexpirelistener_zookeeperdisconnectspersec{namespace="$ns"}', - legendFormat="{{pod}}", + ), + G.TimeSeries( + title="Kafka: Disconnected Connections/sec", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="kafka_server_sessionexpirelistener_zookeeperdisconnectspersec{" + + env_label + + '="$env"}', + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + stacking={"mode": "normal"}, + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=kafka_base + 1 ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="ms", - stacking={"mode": "normal"}, - gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 0, y=kafka_base + 1), - ), - G.TimeSeries( - title="Kafka: Auth Failures on Connections/sec", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr='kafka_server_sessionexpirelistener_zookeeperauthfailurespersec{namespace="$ns"}', - legendFormat="{{pod}}", + ), + G.TimeSeries( + title="Kafka: Auth Failures on Connections/sec", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="kafka_server_sessionexpirelistener_zookeeperauthfailurespersec{" + + env_label + + '="$env"}', + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + stacking={"mode": "normal"}, + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=kafka_base + 1 ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="ms", - stacking={"mode": "normal"}, - gridPos=G.GridPos(h=hcHeight * 2, w=tsWidth, x=tsWidth * 1, y=kafka_base + 1), - ), -] -kafka_panels = [ - G.RowPanel( - title="Client Latency (Kafka)", - gridPos=G.GridPos(h=1, w=24, x=0, y=kafka_base), - collapsed=True, - panels=kafka_inner, - ), -] + ), + ] + kafka_panels = [ + G.RowPanel( + title="Client Latency (Kafka)", + gridPos=G.GridPos(h=1, w=24, x=0, y=kafka_base), + collapsed=True, + panels=kafka_inner, + ), + ] + + panels = healthcheck_panels + system_panels + latency_panels + kafka_panels -panels = healthcheck_panels + system_panels + latency_panels + kafka_panels + return G.Dashboard( + title="Zookeeper cluster - v2", + description="Overview of the Zookeeper cluster", + tags=["confluent", "kafka", "zookeeper"], + inputs=[ + G.DataSourceInput( + name="DS_PROMETHEUS", + label="Prometheus", + pluginId="prometheus", + pluginName="Prometheus", + ) + ], + templating=templating, + timezone="browser", + panels=panels, + refresh="30s", + ).auto_panel_ids() -dashboard = G.Dashboard( - title="Zookeeper cluster - v2", - description="Overview of the Zookeeper cluster", - tags=["confluent", "kafka", "zookeeper"], - inputs=[ - G.DataSourceInput( - name="DS_PROMETHEUS", - label="Prometheus", - pluginId="prometheus", - pluginName="Prometheus", - ) - ], - templating=templating, - timezone="browser", - panels=panels, - refresh="30s", -).auto_panel_ids() +env_label = os.environ.get("ENV_LABEL", "env") +server_label = os.environ.get("SERVER_LABEL", "hostname") +dashboard = dashboard(env_label, server_label) From 194aa2de3b82f4a7395748f47fbab7a639d23464 Mon Sep 17 00:00:00 2001 From: Jorge Esteban Quilcate Otoya Date: Mon, 11 Jul 2022 17:02:34 +0100 Subject: [PATCH 20/28] feat: kafka consumer and producer dashboards --- grafana-dashboards/Makefile | 2 + grafana-dashboards/ksqldb-cluster.py | 349 ++++++++++++++---- grafana-dashboards/schema-registry-cluster.py | 56 ++- grafana-dashboards/zookeeper-cluster.py | 1 + 4 files changed, 327 insertions(+), 81 deletions(-) diff --git a/grafana-dashboards/Makefile b/grafana-dashboards/Makefile index e37f2f44..9ac68244 100644 --- a/grafana-dashboards/Makefile +++ b/grafana-dashboards/Makefile @@ -20,6 +20,8 @@ dashboards: @generate-dashboard schema-registry-cluster.py -o $(OUTPUT_DIR)/schema-registry-cluster.json @generate-dashboard kafka-connect-cluster.py -o $(OUTPUT_DIR)/kafka-connect-cluster.json @generate-dashboard ksqldb-cluster.py -o $(OUTPUT_DIR)/ksqldb-cluster.json + @generate-dashboard kafka-producer.py -o $(OUTPUT_DIR)/kafka-producer.json + @generate-dashboard kafka-consumer.py -o $(OUTPUT_DIR)/kafka-consumer.json cfk: OUTPUT_DIR=cfk cfk: export ENV_LABEL=namespace diff --git a/grafana-dashboards/ksqldb-cluster.py b/grafana-dashboards/ksqldb-cluster.py index 5c98d9b2..01caa9e6 100644 --- a/grafana-dashboards/ksqldb-cluster.py +++ b/grafana-dashboards/ksqldb-cluster.py @@ -1,44 +1,56 @@ import os import grafanalib.core as G -def dashboard(env_label='namespace', server_label='pod', ksqldb_cluster_label='app'): + +def dashboard(env_label="namespace", server_label="pod", ksqldb_cluster_label="app"): default_height = 5 stat_width = 4 ts_width = 8 - + templating = G.Templating( list=[ G.Template( name="env", label="Environment", dataSource="Prometheus", - query="label_values("+env_label+")", + query="label_values(" + env_label + ")", ), G.Template( name="ksqldb_cluster", label="ksqlDB cluster", dataSource="Prometheus", - query='label_values(ksql_ksql_engine_query_stats_num_active_queries{' + env_label + '="$env"},'+ksqldb_cluster_label+')', + query="label_values(ksql_ksql_engine_query_stats_num_active_queries{" + + env_label + + '="$env"},' + + ksqldb_cluster_label + + ")", ), G.Template( name="ksqldb_cluster_id", label="ksqlDB cluster ID", dataSource="Prometheus", - query='label_values(ksql_ksql_engine_query_stats_num_active_queries{' + env_label + '="$env"},ksql_cluster)', - hide=2, # true + query="label_values(ksql_ksql_engine_query_stats_num_active_queries{" + + env_label + + '="$env"},ksql_cluster)', + hide=2, # true ), G.Template( name="ksqldb_server", label="ksqlDB server", dataSource="Prometheus", - query='label_values(ksql_ksql_engine_query_stats_num_active_queries{' + env_label + '="$env",' + ksqldb_cluster_label + '="$ksqldb_cluster"}, '+server_label+')', + query="label_values(ksql_ksql_engine_query_stats_num_active_queries{" + + env_label + + '="$env",' + + ksqldb_cluster_label + + '="$ksqldb_cluster"}, ' + + server_label + + ")", multi=True, includeAll=True, ), ] ) - - + hc_base = 0 hc_panels = [ G.RowPanel( @@ -50,35 +62,51 @@ def dashboard(env_label='namespace', server_label='pod', ksqldb_cluster_label='a dataSource="${DS_PROMETHEUS}", targets=[ G.Target( - expr='count(ksql_ksql_engine_query_stats_num_active_queries{' + env_label + '="$env", ' + ksqldb_cluster_label + '="$ksqldb_cluster"})', + expr="count(ksql_ksql_engine_query_stats_num_active_queries{" + + env_label + + '="$env", ' + + ksqldb_cluster_label + + '="$ksqldb_cluster"})', ), ], reduceCalc="last", thresholds=[ G.Threshold(index=0, value=0.0, color="blue"), ], - gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 0, y=hc_base), + gridPos=G.GridPos( + h=default_height, w=stat_width, x=stat_width * 0, y=hc_base + ), ), G.Stat( title="ksqlDB: Sum of Active Queries", dataSource="${DS_PROMETHEUS}", targets=[ G.Target( - expr='sum(ksql_ksql_engine_query_stats_num_active_queries{' + env_label + '="$env", ' + ksqldb_cluster_label + '="$ksqldb_cluster"})', + expr="sum(ksql_ksql_engine_query_stats_num_active_queries{" + + env_label + + '="$env", ' + + ksqldb_cluster_label + + '="$ksqldb_cluster"})', ), ], reduceCalc="last", thresholds=[ G.Threshold(index=0, value=0.0, color="blue"), ], - gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 1, y=hc_base), + gridPos=G.GridPos( + h=default_height, w=stat_width, x=stat_width * 1, y=hc_base + ), ), G.Stat( title="ksqlDB: Sum of Running Queries", dataSource="${DS_PROMETHEUS}", targets=[ G.Target( - expr='sum(ksql_ksql_engine_query_stats_running_queries{' + env_label + '="$env", ' + ksqldb_cluster_label + '="$ksqldb_cluster"})', + expr="sum(ksql_ksql_engine_query_stats_running_queries{" + + env_label + + '="$env", ' + + ksqldb_cluster_label + + '="$ksqldb_cluster"})', ), ], reduceCalc="last", @@ -86,14 +114,20 @@ def dashboard(env_label='namespace', server_label='pod', ksqldb_cluster_label='a G.Threshold(index=0, value=0.0, color="blue"), G.Threshold(index=1, value=1.0, color="green"), ], - gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 2, y=hc_base), + gridPos=G.GridPos( + h=default_height, w=stat_width, x=stat_width * 2, y=hc_base + ), ), G.Stat( title="ksqlDB: Sum of Rebalancing Queries", dataSource="${DS_PROMETHEUS}", targets=[ G.Target( - expr='sum(ksql_ksql_engine_query_stats_rebalancing_queries{' + env_label + '="$env", ' + ksqldb_cluster_label + '="$ksqldb_cluster"})', + expr="sum(ksql_ksql_engine_query_stats_rebalancing_queries{" + + env_label + + '="$env", ' + + ksqldb_cluster_label + + '="$ksqldb_cluster"})', ), ], reduceCalc="last", @@ -101,14 +135,20 @@ def dashboard(env_label='namespace', server_label='pod', ksqldb_cluster_label='a G.Threshold(index=0, value=0.0, color="blue"), G.Threshold(index=1, value=1.0, color="yellow"), ], - gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 3, y=hc_base), + gridPos=G.GridPos( + h=default_height, w=stat_width, x=stat_width * 3, y=hc_base + ), ), G.Stat( title="Connect: Sum of Failed Queries", dataSource="${DS_PROMETHEUS}", targets=[ G.Target( - expr='avg(ksql_ksql_engine_query_stats_error_queries{' + env_label + '="$env", ' + ksqldb_cluster_label + '="$ksqldb_cluster"})', + expr="avg(ksql_ksql_engine_query_stats_error_queries{" + + env_label + + '="$env", ' + + ksqldb_cluster_label + + '="$ksqldb_cluster"})', ), ], reduceCalc="last", @@ -116,15 +156,21 @@ def dashboard(env_label='namespace', server_label='pod', ksqldb_cluster_label='a G.Threshold(index=0, value=0.0, color="blue"), G.Threshold(index=1, value=1.0, color="red"), ], - gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 4, y=hc_base), + gridPos=G.GridPos( + h=default_height, w=stat_width, x=stat_width * 4, y=hc_base + ), ), G.TimeSeries( title="Cluster Liveness", dataSource="${DS_PROMETHEUS}", targets=[ G.Target( - expr='ksql_ksql_engine_query_stats_liveness_indicator{' + env_label + '="$env",' + ksqldb_cluster_label + '="$ksqldb_cluster"}', - legendFormat="{{"+server_label+"}}", + expr="ksql_ksql_engine_query_stats_liveness_indicator{" + + env_label + + '="$env",' + + ksqldb_cluster_label + + '="$ksqldb_cluster"}', + legendFormat="{{" + server_label + "}}", ), ], legendDisplayMode="table", @@ -136,31 +182,43 @@ def dashboard(env_label='namespace', server_label='pod', ksqldb_cluster_label='a dataSource="${DS_PROMETHEUS}", targets=[ G.Target( - expr='ksql_ksql_engine_query_stats_messages_consumed_per_sec{' + env_label + '="$env",' + ksqldb_cluster_label + '="$ksqldb_cluster"}', - legendFormat="{{"+server_label+"}}", + expr="ksql_ksql_engine_query_stats_messages_consumed_per_sec{" + + env_label + + '="$env",' + + ksqldb_cluster_label + + '="$ksqldb_cluster"}', + legendFormat="{{" + server_label + "}}", ), ], legendDisplayMode="table", legendCalcs=["max", "mean", "last"], unit="cps", - gridPos=G.GridPos(h=default_height * 2, w=ts_width, x=ts_width * 1, y=hc_base + 1), + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=hc_base + 1 + ), ), G.TimeSeries( title="Messages produced/sec", dataSource="${DS_PROMETHEUS}", targets=[ G.Target( - expr='ksql_ksql_engine_query_stats_messages_produced_per_sec{' + env_label + '="$env",' + ksqldb_cluster_label + '="$ksqldb_cluster"}', - legendFormat="{{"+server_label+"}}", + expr="ksql_ksql_engine_query_stats_messages_produced_per_sec{" + + env_label + + '="$env",' + + ksqldb_cluster_label + + '="$ksqldb_cluster"}', + legendFormat="{{" + server_label + "}}", ), ], legendDisplayMode="table", legendCalcs=["max", "mean", "last"], unit="cps", - gridPos=G.GridPos(h=default_height * 2, w=ts_width, x=ts_width * 2, y=hc_base + 1), + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=hc_base + 1 + ), ), ] - + system_base = hc_base + 2 system_panels = [ G.RowPanel( @@ -172,45 +230,69 @@ def dashboard(env_label='namespace', server_label='pod', ksqldb_cluster_label='a dataSource="${DS_PROMETHEUS}", targets=[ G.Target( - expr='irate(process_cpu_seconds_total{' + env_label + '="$env",' + ksqldb_cluster_label + '="$ksqldb_cluster",'+server_label+'=~"$ksqldb_server"}[5m])', - legendFormat="{{"+server_label+"}}", + expr="irate(process_cpu_seconds_total{" + + env_label + + '="$env",' + + ksqldb_cluster_label + + '="$ksqldb_cluster",' + + server_label + + '=~"$ksqldb_server"}[5m])', + legendFormat="{{" + server_label + "}}", ), ], legendDisplayMode="table", legendCalcs=["max", "mean", "last"], unit="percentunit", - gridPos=G.GridPos(h=default_height * 2, w=ts_width, x=ts_width * 0, y=system_base), + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=system_base + ), ), G.TimeSeries( title="Memory usage", dataSource="${DS_PROMETHEUS}", targets=[ G.Target( - expr='sum without(area)(jvm_memory_bytes_used{' + env_label + '="$env",' + ksqldb_cluster_label + '="$ksqldb_cluster",'+server_label+'=~"$ksqldb_server"})', - legendFormat="{{"+server_label+"}}", + expr="sum without(area)(jvm_memory_bytes_used{" + + env_label + + '="$env",' + + ksqldb_cluster_label + + '="$ksqldb_cluster",' + + server_label + + '=~"$ksqldb_server"})', + legendFormat="{{" + server_label + "}}", ), ], legendDisplayMode="table", legendCalcs=["max", "mean", "last"], unit="bytes", - gridPos=G.GridPos(h=default_height * 2, w=ts_width, x=ts_width * 1, y=system_base), + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=system_base + ), ), G.TimeSeries( title="GC collection", dataSource="${DS_PROMETHEUS}", targets=[ G.Target( - expr='sum without(gc)(irate(jvm_gc_collection_seconds_sum{' + env_label + '="$env",' + ksqldb_cluster_label + '="$ksqldb_cluster",'+server_label+'=~"$ksqldb_server"}[5m]))', - legendFormat="{{"+server_label+"}}", + expr="sum without(gc)(irate(jvm_gc_collection_seconds_sum{" + + env_label + + '="$env",' + + ksqldb_cluster_label + + '="$ksqldb_cluster",' + + server_label + + '=~"$ksqldb_server"}[5m]))', + legendFormat="{{" + server_label + "}}", ), ], legendDisplayMode="table", legendCalcs=["max", "mean", "last"], unit="percentunit", - gridPos=G.GridPos(h=default_height * 2, w=ts_width, x=ts_width * 2, y=system_base), + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=system_base + ), ), ] - + queries_base = system_base + 1 queries_inner = [ G.TimeSeries( @@ -218,7 +300,13 @@ def dashboard(env_label='namespace', server_label='pod', ksqldb_cluster_label='a dataSource="${DS_PROMETHEUS}", targets=[ G.Target( - expr='kafka_streams_stream_thread_metrics_poll_latency_avg{' + env_label + '="$env",' + ksqldb_cluster_label + '="$ksqldb_cluster",' + server_label + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + expr="kafka_streams_stream_thread_metrics_poll_latency_avg{" + + env_label + + '="$env",' + + ksqldb_cluster_label + + '="$ksqldb_cluster",' + + server_label + + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', legendFormat="{{thread_id}}", ), ], @@ -234,7 +322,13 @@ def dashboard(env_label='namespace', server_label='pod', ksqldb_cluster_label='a dataSource="${DS_PROMETHEUS}", targets=[ G.Target( - expr='kafka_streams_stream_thread_metrics_poll_latency_max{' + env_label + '="$env",' + ksqldb_cluster_label + '="$ksqldb_cluster",' + server_label + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + expr="kafka_streams_stream_thread_metrics_poll_latency_max{" + + env_label + + '="$env",' + + ksqldb_cluster_label + + '="$ksqldb_cluster",' + + server_label + + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', legendFormat="{{thread_id}}", ), ], @@ -250,7 +344,13 @@ def dashboard(env_label='namespace', server_label='pod', ksqldb_cluster_label='a dataSource="${DS_PROMETHEUS}", targets=[ G.Target( - expr='kafka_streams_stream_thread_metrics_process_latency_avg{' + env_label + '="$env",' + ksqldb_cluster_label + '="$ksqldb_cluster",' + server_label + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + expr="kafka_streams_stream_thread_metrics_process_latency_avg{" + + env_label + + '="$env",' + + ksqldb_cluster_label + + '="$ksqldb_cluster",' + + server_label + + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', legendFormat="{{thread_id}}", ), ], @@ -266,7 +366,13 @@ def dashboard(env_label='namespace', server_label='pod', ksqldb_cluster_label='a dataSource="${DS_PROMETHEUS}", targets=[ G.Target( - expr='kafka_streams_stream_thread_metrics_process_latency_max{' + env_label + '="$env",' + ksqldb_cluster_label + '="$ksqldb_cluster",' + server_label + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + expr="kafka_streams_stream_thread_metrics_process_latency_max{" + + env_label + + '="$env",' + + ksqldb_cluster_label + + '="$ksqldb_cluster",' + + server_label + + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', legendFormat="{{thread_id}}", ), ], @@ -282,7 +388,13 @@ def dashboard(env_label='namespace', server_label='pod', ksqldb_cluster_label='a dataSource="${DS_PROMETHEUS}", targets=[ G.Target( - expr='kafka_streams_stream_thread_metrics_commit_latency_avg{' + env_label + '="$env",' + ksqldb_cluster_label + '="$ksqldb_cluster",' + server_label + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + expr="kafka_streams_stream_thread_metrics_commit_latency_avg{" + + env_label + + '="$env",' + + ksqldb_cluster_label + + '="$ksqldb_cluster",' + + server_label + + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', legendFormat="{{thread_id}}", ), ], @@ -298,7 +410,13 @@ def dashboard(env_label='namespace', server_label='pod', ksqldb_cluster_label='a dataSource="${DS_PROMETHEUS}", targets=[ G.Target( - expr='kafka_streams_stream_thread_metrics_commit_latency_max{' + env_label + '="$env",' + ksqldb_cluster_label + '="$ksqldb_cluster",' + server_label + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + expr="kafka_streams_stream_thread_metrics_commit_latency_max{" + + env_label + + '="$env",' + + ksqldb_cluster_label + + '="$ksqldb_cluster",' + + server_label + + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', legendFormat="{{thread_id}}", ), ], @@ -314,7 +432,13 @@ def dashboard(env_label='namespace', server_label='pod', ksqldb_cluster_label='a dataSource="${DS_PROMETHEUS}", targets=[ G.Target( - expr='kafka_streams_stream_thread_metrics_punctuate_latency_avg{' + env_label + '="$env",' + ksqldb_cluster_label + '="$ksqldb_cluster",' + server_label + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + expr="kafka_streams_stream_thread_metrics_punctuate_latency_avg{" + + env_label + + '="$env",' + + ksqldb_cluster_label + + '="$ksqldb_cluster",' + + server_label + + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', legendFormat="{{thread_id}}", ), ], @@ -330,7 +454,13 @@ def dashboard(env_label='namespace', server_label='pod', ksqldb_cluster_label='a dataSource="${DS_PROMETHEUS}", targets=[ G.Target( - expr='kafka_streams_stream_thread_metrics_punctuate_latency_max{' + env_label + '="$env",' + ksqldb_cluster_label + '="$ksqldb_cluster",' + server_label + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + expr="kafka_streams_stream_thread_metrics_punctuate_latency_max{" + + env_label + + '="$env",' + + ksqldb_cluster_label + + '="$ksqldb_cluster",' + + server_label + + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', legendFormat="{{thread_id}}", ), ], @@ -350,7 +480,7 @@ def dashboard(env_label='namespace', server_label='pod', ksqldb_cluster_label='a panels=queries_inner, ), ] - + stores_base = queries_base + 4 stores_inner = [ G.TimeSeries( @@ -358,7 +488,13 @@ def dashboard(env_label='namespace', server_label='pod', ksqldb_cluster_label='a dataSource="${DS_PROMETHEUS}", targets=[ G.Target( - expr='kafka_streams_stream_state_metrics_put_rate{' + env_label + '="$env",' + ksqldb_cluster_label + '="$ksqldb_cluster",' + server_label + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + expr="kafka_streams_stream_state_metrics_put_rate{" + + env_label + + '="$env",' + + ksqldb_cluster_label + + '="$ksqldb_cluster",' + + server_label + + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', legendFormat="{{thread_id}}", ), ], @@ -374,7 +510,13 @@ def dashboard(env_label='namespace', server_label='pod', ksqldb_cluster_label='a dataSource="${DS_PROMETHEUS}", targets=[ G.Target( - expr='kafka_streams_stream_state_metrics_put_latency_avg{' + env_label + '="$env",' + ksqldb_cluster_label + '="$ksqldb_cluster",' + server_label + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + expr="kafka_streams_stream_state_metrics_put_latency_avg{" + + env_label + + '="$env",' + + ksqldb_cluster_label + + '="$ksqldb_cluster",' + + server_label + + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', legendFormat="{{thread_id}}", ), ], @@ -390,7 +532,13 @@ def dashboard(env_label='namespace', server_label='pod', ksqldb_cluster_label='a dataSource="${DS_PROMETHEUS}", targets=[ G.Target( - expr='kafka_streams_stream_state_metrics_put_latency_max{' + env_label + '="$env",' + ksqldb_cluster_label + '="$ksqldb_cluster",' + server_label + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + expr="kafka_streams_stream_state_metrics_put_latency_max{" + + env_label + + '="$env",' + + ksqldb_cluster_label + + '="$ksqldb_cluster",' + + server_label + + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', legendFormat="{{thread_id}}", ), ], @@ -406,7 +554,13 @@ def dashboard(env_label='namespace', server_label='pod', ksqldb_cluster_label='a dataSource="${DS_PROMETHEUS}", targets=[ G.Target( - expr='kafka_streams_stream_state_metrics_put_if_absent_rate{' + env_label + '="$env",' + ksqldb_cluster_label + '="$ksqldb_cluster",' + server_label + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + expr="kafka_streams_stream_state_metrics_put_if_absent_rate{" + + env_label + + '="$env",' + + ksqldb_cluster_label + + '="$ksqldb_cluster",' + + server_label + + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', legendFormat="{{thread_id}}", ), ], @@ -422,7 +576,13 @@ def dashboard(env_label='namespace', server_label='pod', ksqldb_cluster_label='a dataSource="${DS_PROMETHEUS}", targets=[ G.Target( - expr='kafka_streams_stream_state_metrics_put_if_absent_latency_avg{' + env_label + '="$env",' + ksqldb_cluster_label + '="$ksqldb_cluster",' + server_label + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + expr="kafka_streams_stream_state_metrics_put_if_absent_latency_avg{" + + env_label + + '="$env",' + + ksqldb_cluster_label + + '="$ksqldb_cluster",' + + server_label + + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', legendFormat="{{thread_id}}", ), ], @@ -438,7 +598,13 @@ def dashboard(env_label='namespace', server_label='pod', ksqldb_cluster_label='a dataSource="${DS_PROMETHEUS}", targets=[ G.Target( - expr='kafka_streams_stream_state_metrics_put_if_absent_latency_max{' + env_label + '="$env",' + ksqldb_cluster_label + '="$ksqldb_cluster",' + server_label + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + expr="kafka_streams_stream_state_metrics_put_if_absent_latency_max{" + + env_label + + '="$env",' + + ksqldb_cluster_label + + '="$ksqldb_cluster",' + + server_label + + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', legendFormat="{{thread_id}}", ), ], @@ -454,7 +620,13 @@ def dashboard(env_label='namespace', server_label='pod', ksqldb_cluster_label='a dataSource="${DS_PROMETHEUS}", targets=[ G.Target( - expr='kafka_streams_stream_state_metrics_fetch_rate{' + env_label + '="$env",' + ksqldb_cluster_label + '="$ksqldb_cluster",' + server_label + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + expr="kafka_streams_stream_state_metrics_fetch_rate{" + + env_label + + '="$env",' + + ksqldb_cluster_label + + '="$ksqldb_cluster",' + + server_label + + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', legendFormat="{{thread_id}}", ), ], @@ -470,7 +642,13 @@ def dashboard(env_label='namespace', server_label='pod', ksqldb_cluster_label='a dataSource="${DS_PROMETHEUS}", targets=[ G.Target( - expr='kafka_streams_stream_state_metrics_fetch_latency_avg{' + env_label + '="$env",' + ksqldb_cluster_label + '="$ksqldb_cluster",' + server_label + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + expr="kafka_streams_stream_state_metrics_fetch_latency_avg{" + + env_label + + '="$env",' + + ksqldb_cluster_label + + '="$ksqldb_cluster",' + + server_label + + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', legendFormat="{{thread_id}}", ), ], @@ -486,7 +664,13 @@ def dashboard(env_label='namespace', server_label='pod', ksqldb_cluster_label='a dataSource="${DS_PROMETHEUS}", targets=[ G.Target( - expr='kafka_streams_stream_state_metrics_fetch_latency_max{' + env_label + '="$env",' + ksqldb_cluster_label + '="$ksqldb_cluster",' + server_label + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + expr="kafka_streams_stream_state_metrics_fetch_latency_max{" + + env_label + + '="$env",' + + ksqldb_cluster_label + + '="$ksqldb_cluster",' + + server_label + + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', legendFormat="{{thread_id}}", ), ], @@ -502,7 +686,13 @@ def dashboard(env_label='namespace', server_label='pod', ksqldb_cluster_label='a dataSource="${DS_PROMETHEUS}", targets=[ G.Target( - expr='kafka_streams_stream_state_metrics_delete_rate{' + env_label + '="$env",' + ksqldb_cluster_label + '="$ksqldb_cluster",' + server_label + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + expr="kafka_streams_stream_state_metrics_delete_rate{" + + env_label + + '="$env",' + + ksqldb_cluster_label + + '="$ksqldb_cluster",' + + server_label + + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', legendFormat="{{thread_id}}", ), ], @@ -518,7 +708,13 @@ def dashboard(env_label='namespace', server_label='pod', ksqldb_cluster_label='a dataSource="${DS_PROMETHEUS}", targets=[ G.Target( - expr='kafka_streams_stream_state_metrics_delete_latency_avg{' + env_label + '="$env",' + ksqldb_cluster_label + '="$ksqldb_cluster",' + server_label + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + expr="kafka_streams_stream_state_metrics_delete_latency_avg{" + + env_label + + '="$env",' + + ksqldb_cluster_label + + '="$ksqldb_cluster",' + + server_label + + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', legendFormat="{{thread_id}}", ), ], @@ -534,7 +730,13 @@ def dashboard(env_label='namespace', server_label='pod', ksqldb_cluster_label='a dataSource="${DS_PROMETHEUS}", targets=[ G.Target( - expr='kafka_streams_stream_state_metrics_delete_latency_max{' + env_label + '="$env",' + ksqldb_cluster_label + '="$ksqldb_cluster",' + server_label + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + expr="kafka_streams_stream_state_metrics_delete_latency_max{" + + env_label + + '="$env",' + + ksqldb_cluster_label + + '="$ksqldb_cluster",' + + server_label + + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', legendFormat="{{thread_id}}", ), ], @@ -550,7 +752,13 @@ def dashboard(env_label='namespace', server_label='pod', ksqldb_cluster_label='a dataSource="${DS_PROMETHEUS}", targets=[ G.Target( - expr='kafka_streams_stream_state_metrics_restore_rate{' + env_label + '="$env",' + ksqldb_cluster_label + '="$ksqldb_cluster",' + server_label + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + expr="kafka_streams_stream_state_metrics_restore_rate{" + + env_label + + '="$env",' + + ksqldb_cluster_label + + '="$ksqldb_cluster",' + + server_label + + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', legendFormat="{{thread_id}}", ), ], @@ -566,7 +774,13 @@ def dashboard(env_label='namespace', server_label='pod', ksqldb_cluster_label='a dataSource="${DS_PROMETHEUS}", targets=[ G.Target( - expr='kafka_streams_stream_state_metrics_restore_latency_avg{' + env_label + '="$env",' + ksqldb_cluster_label + '="$ksqldb_cluster",' + server_label + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + expr="kafka_streams_stream_state_metrics_restore_latency_avg{" + + env_label + + '="$env",' + + ksqldb_cluster_label + + '="$ksqldb_cluster",' + + server_label + + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', legendFormat="{{thread_id}}", ), ], @@ -582,7 +796,13 @@ def dashboard(env_label='namespace', server_label='pod', ksqldb_cluster_label='a dataSource="${DS_PROMETHEUS}", targets=[ G.Target( - expr='kafka_streams_stream_state_metrics_restore_latency_max{' + env_label + '="$env",' + ksqldb_cluster_label + '="$ksqldb_cluster",' + server_label + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + expr="kafka_streams_stream_state_metrics_restore_latency_max{" + + env_label + + '="$env",' + + ksqldb_cluster_label + + '="$ksqldb_cluster",' + + server_label + + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', legendFormat="{{thread_id}}", ), ], @@ -602,9 +822,9 @@ def dashboard(env_label='namespace', server_label='pod', ksqldb_cluster_label='a panels=stores_inner, ), ] - + panels = hc_panels + system_panels + queries_panels + stores_panels - + return G.Dashboard( title="ksqlDB cluster - v2", description="Overview of ksqlDB clusters.", @@ -626,6 +846,7 @@ def dashboard(env_label='namespace', server_label='pod', ksqldb_cluster_label='a refresh="30s", ).auto_panel_ids() + env_label = os.environ.get("ENV_LABEL", "env") server_label = os.environ.get("SERVER_LABEL", "hostname") ksqldb_cluster_label = os.environ.get("KSQLDB_CLUSTER_LABEL", "ksqldb_cluster_id") diff --git a/grafana-dashboards/schema-registry-cluster.py b/grafana-dashboards/schema-registry-cluster.py index a3497fa2..226363b4 100644 --- a/grafana-dashboards/schema-registry-cluster.py +++ b/grafana-dashboards/schema-registry-cluster.py @@ -1,18 +1,19 @@ import os import grafanalib.core as G -def dashboard(env_label='namespace',server_label='pod'): + +def dashboard(env_label="namespace", server_label="pod"): default_height = 5 stat_width = 4 ts_width = 8 - + templating = G.Templating( list=[ G.Template( name="env", label="Environment", dataSource="Prometheus", - query="label_values("+env_label+")", + query="label_values(" + env_label + ")", ), G.Template( name="sr_server", @@ -28,7 +29,7 @@ def dashboard(env_label='namespace',server_label='pod'): ), ] ) - + healthcheck_base = 0 healthcheck_panels = [ G.RowPanel( @@ -40,7 +41,9 @@ def dashboard(env_label='namespace',server_label='pod'): dataSource="${DS_PROMETHEUS}", targets=[ G.Target( - expr='count(kafka_schema_registry_registered_count{' + env_label + '="$env"})', + expr="count(kafka_schema_registry_registered_count{" + + env_label + + '="$env"})', ), ], reduceCalc="last", @@ -58,7 +61,9 @@ def dashboard(env_label='namespace',server_label='pod'): dataSource="${DS_PROMETHEUS}", targets=[ G.Target( - expr='avg(kafka_schema_registry_registered_count{' + env_label + '="$env"})', + expr="avg(kafka_schema_registry_registered_count{" + + env_label + + '="$env"})', instant=True, ), ], @@ -75,7 +80,9 @@ def dashboard(env_label='namespace',server_label='pod'): dataSource="${DS_PROMETHEUS}", targets=[ G.Target( - expr='avg(kafka_schema_registry_schemas_created{' + env_label + '="$env"}) by (schema_type)', + expr="avg(kafka_schema_registry_schemas_created{" + + env_label + + '="$env"}) by (schema_type)', legendFormat="{{schema_type}}", ), ], @@ -92,7 +99,9 @@ def dashboard(env_label='namespace',server_label='pod'): dataSource="${DS_PROMETHEUS}", targets=[ G.Target( - expr='sum(kafka_schema_registry_schemas_deleted{' + env_label + '="$env"}) by (schema_type)', + expr="sum(kafka_schema_registry_schemas_deleted{" + + env_label + + '="$env"}) by (schema_type)', legendFormat="{{schema_type}}", ), ], @@ -121,7 +130,7 @@ def dashboard(env_label='namespace',server_label='pod'): ), ), ] - + system_panels = [ G.RowPanel( title="System", @@ -132,8 +141,12 @@ def dashboard(env_label='namespace',server_label='pod'): dataSource="${DS_PROMETHEUS}", targets=[ G.Target( - expr='irate(process_cpu_seconds_total{' + env_label + '="$env",'+server_label+'=~"$sr_server"}[5m])', - legendFormat="{{"+server_label+"}}", + expr="irate(process_cpu_seconds_total{" + + env_label + + '="$env",' + + server_label + + '=~"$sr_server"}[5m])', + legendFormat="{{" + server_label + "}}", ), ], legendDisplayMode="table", @@ -146,8 +159,12 @@ def dashboard(env_label='namespace',server_label='pod'): dataSource="${DS_PROMETHEUS}", targets=[ G.Target( - expr='sum without(area)(jvm_memory_bytes_used{' + env_label + '="$env",'+server_label+'=~"$sr_server"})', - legendFormat="{{"+server_label+"}}", + expr="sum without(area)(jvm_memory_bytes_used{" + + env_label + + '="$env",' + + server_label + + '=~"$sr_server"})', + legendFormat="{{" + server_label + "}}", ), ], legendDisplayMode="table", @@ -160,8 +177,12 @@ def dashboard(env_label='namespace',server_label='pod'): dataSource="${DS_PROMETHEUS}", targets=[ G.Target( - expr='sum without(gc)(irate(jvm_gc_collection_seconds_sum{' + env_label + '="$env",'+server_label+'=~"$sr_server"}[5m]))', - legendFormat="{{"+server_label+"}}", + expr="sum without(gc)(irate(jvm_gc_collection_seconds_sum{" + + env_label + + '="$env",' + + server_label + + '=~"$sr_server"}[5m]))', + legendFormat="{{" + server_label + "}}", ), ], legendDisplayMode="table", @@ -170,9 +191,9 @@ def dashboard(env_label='namespace',server_label='pod'): gridPos=G.GridPos(h=default_height * 2, w=ts_width, x=ts_width * 2, y=1), ), ] - + panels = healthcheck_panels + system_panels - + return G.Dashboard( title="Schema Registry cluster - v2", description="Overview of the Schema Registry cluster", @@ -191,6 +212,7 @@ def dashboard(env_label='namespace',server_label='pod'): refresh="30s", ).auto_panel_ids() + env_label = os.environ.get("ENV_LABEL", "env") server_label = os.environ.get("SERVER_LABEL", "hostname") dashboard = dashboard(env_label, server_label) diff --git a/grafana-dashboards/zookeeper-cluster.py b/grafana-dashboards/zookeeper-cluster.py index 0e1b34d7..2925c76f 100644 --- a/grafana-dashboards/zookeeper-cluster.py +++ b/grafana-dashboards/zookeeper-cluster.py @@ -371,6 +371,7 @@ def dashboard(env_label="namespace", server_label="pod"): refresh="30s", ).auto_panel_ids() + env_label = os.environ.get("ENV_LABEL", "env") server_label = os.environ.get("SERVER_LABEL", "hostname") dashboard = dashboard(env_label, server_label) From ad70e848b0a4cd1174d704e439717b2fdf2a0611 Mon Sep 17 00:00:00 2001 From: Jorge Esteban Quilcate Otoya Date: Mon, 11 Jul 2022 17:03:11 +0100 Subject: [PATCH 21/28] feat: kafka consumer and producer dashboards --- grafana-dashboards/cfk/kafka-consumer.json | 4363 +++++++++++++++++ grafana-dashboards/cfk/kafka-producer.json | 4087 +++++++++++++++ .../default/kafka-consumer.json | 4363 +++++++++++++++++ .../default/kafka-producer.json | 4087 +++++++++++++++ grafana-dashboards/kafka-consumer.py | 1093 +++++ grafana-dashboards/kafka-producer.py | 1000 ++++ 6 files changed, 18993 insertions(+) create mode 100644 grafana-dashboards/cfk/kafka-consumer.json create mode 100644 grafana-dashboards/cfk/kafka-producer.json create mode 100644 grafana-dashboards/default/kafka-consumer.json create mode 100644 grafana-dashboards/default/kafka-producer.json create mode 100644 grafana-dashboards/kafka-consumer.py create mode 100644 grafana-dashboards/kafka-producer.py diff --git a/grafana-dashboards/cfk/kafka-consumer.json b/grafana-dashboards/cfk/kafka-consumer.json new file mode 100644 index 00000000..8f23a386 --- /dev/null +++ b/grafana-dashboards/cfk/kafka-consumer.json @@ -0,0 +1,4363 @@ +{ + "__inputs": [ + { + "description": "", + "label": "Prometheus", + "name": "DS_PROMETHEUS", + "pluginId": "prometheus", + "pluginName": "Prometheus", + "type": "datasource" + } + ], + "annotations": { + "list": [] + }, + "description": "Overview of the Kafka consumers", + "editable": true, + "gnetId": null, + "hideControls": false, + "id": null, + "links": [], + "panels": [ + { + "cacheTimeout": null, + "collapsed": false, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 1, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Overview", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 2, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10, kafka_consumer_consumer_fetch_manager_metrics_records_consumed_rate{namespace=\"$env\", client_type=\"producer\", client_id=~\"$client_id\", pod=~\"$server\"} > 0)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Record Consumed Rate", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 4, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 3, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10, kafka_consumer_consumer_fetch_manager_metrics_records_consumed_rate{namespace=\"$env\", client_type=\"producer\", client_id=~\"$client_id\", pod=~\"$server\"} > 0)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Records Lag", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + }, + { + "color": "red", + "index": 2, + "line": true, + "op": "gt", + "value": 10.0, + "yaxis": "left" + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 8, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 4, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10, kafka_consumer_consumer_coordinator_metrics_last_rebalance_seconds_ago{namespace=\"$env\", client_type=\"producer\", client_id=~\"$client_id\", pod=~\"$server\"} > 0)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Last Rebalance Seconds Ago", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 12, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 5, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "count(kafka_consumer_app_info{namespace=\"$env\", client_id=~\"$client_id\", version!=\"\", pod=~\"$server\"}) by (version)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{version}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Versions", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 6, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 7, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_bytes_consumed_rate{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Bytes Consumed Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "cts" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 8, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_records_consumed_rate{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Records Consumed Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 9, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_records_lag_max{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Records Lag Max", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 10, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_rate{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Fetch Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 11, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_latency_avg{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_latency_max{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Fetch Latency", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 12, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_size_avg{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_size_max{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Fetch Size", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 13, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_throttle_time_avg{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_throttle_time_max{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Fetch Throttle Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 14, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_records_per_request_avg{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Records per Request Avg.", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Performance", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 15, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 16, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_commit_rate{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Commit Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 17, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_join_rate{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Join Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 18, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_sync_rate{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Sync Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 19, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_commit_latency_avg{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_commit_latency_max{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Commit Latency", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 20, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_join_time_avg{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_join_time_max{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Join Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 21, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_sync_time_avg{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_sync_time_max{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Sync Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 22, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_heartbeat_rate{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Heartbeat Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 23, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_heartbeat_response_time_max{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Heartbeat Response Time (Max.)", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 24, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_last_heartbeat_seconds_ago{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Last Heartbeat Seconds Ago", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 25, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_assigned_partitions{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Assigned Partitions", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Consumer group", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 26, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 27, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_metrics_connection_count{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connection Count", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 28, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_metrics_connection_creation_rate{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connection Creation Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 29, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_metrics_connection_close_rate{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connection Close Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 9 + }, + "height": null, + "hideTimeOverride": false, + "id": 30, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_metrics_io_ratio{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "IO ratio", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 9 + }, + "height": null, + "hideTimeOverride": false, + "id": 31, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_metrics_io_wait_ratio{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "IO wait ratio", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 9 + }, + "height": null, + "hideTimeOverride": false, + "id": 32, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_metrics_select_rate{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Select Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ns" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 10 + }, + "height": null, + "hideTimeOverride": false, + "id": 33, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_metrics_io_time_ns_avg{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "IO time avg.", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ns" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 10 + }, + "height": null, + "hideTimeOverride": false, + "id": 34, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_metrics_io_wait_time_ns_avg{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "IO wait time avg.", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Connections", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 11 + }, + "height": null, + "hideTimeOverride": false, + "id": 35, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 11 + }, + "height": null, + "hideTimeOverride": false, + "id": 36, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_node_metrics_incoming_byte_rate{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} <- {{node_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Incoming Byte Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 11 + }, + "height": null, + "hideTimeOverride": false, + "id": 37, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_node_metrics_outgoing_byte_rate{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} -> {{node_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Outgoing Byte Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 11 + }, + "height": null, + "hideTimeOverride": false, + "id": 38, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_node_metrics_request_latency_avg{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} -> {{node_id}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_node_metrics_request_latency_max{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} -> {{node_id}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Request Latency", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 12 + }, + "height": null, + "hideTimeOverride": false, + "id": 39, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_node_metrics_request_rate{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} -> {{node_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Request Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 12 + }, + "height": null, + "hideTimeOverride": false, + "id": 40, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_node_metrics_response_rate{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} <- {{node_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Response Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Per Broker", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 13 + }, + "height": null, + "hideTimeOverride": false, + "id": 41, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 11 + }, + "height": null, + "hideTimeOverride": false, + "id": 42, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_bytes_consumed_rate{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} <- {{topic}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Bytes Consumed Rate per Topic", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 11 + }, + "height": null, + "hideTimeOverride": false, + "id": 43, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_records_consumed_rate{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} <- {{topic}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Records Consumed Rate per Topic", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 12 + }, + "height": null, + "hideTimeOverride": false, + "id": 44, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_size_avg{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} <- {{topic}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_size_max{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} <- {{topic}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Fetch Size per Topic", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 11 + }, + "height": null, + "hideTimeOverride": false, + "id": 45, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_records_per_request_avg{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} <- {{topic}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Records per Request Avg. per Topic", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Per Topic", + "transformations": [], + "transparent": false, + "type": "row" + } + ], + "refresh": "30s", + "rows": [], + "schemaVersion": 12, + "sharedCrosshair": false, + "style": "dark", + "tags": [ + "confluent", + "kafka-client", + "kafka-consumer" + ], + "templating": { + "list": [ + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "Prometheus", + "hide": 0, + "includeAll": false, + "label": "Environment", + "multi": false, + "name": "env", + "options": [], + "query": "label_values(namespace)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "Prometheus", + "hide": 0, + "includeAll": true, + "label": "Server", + "multi": true, + "name": "server", + "options": [], + "query": "label_values(kafka_consumer_consumer_metrics_incoming_byte_rate{namespace=\"$env\", client_type=\"consumer\"},pod)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "Prometheus", + "hide": 0, + "includeAll": true, + "label": "Client ID", + "multi": true, + "name": "client_id", + "options": [], + "query": "label_values(kafka_consumer_consumer_metrics_incoming_byte_rate{namespace=\"$env\", client_type=\"consumer\"},client_id)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "hidden": false, + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Kafka Consumer - v2", + "uid": null, + "version": 0 +} diff --git a/grafana-dashboards/cfk/kafka-producer.json b/grafana-dashboards/cfk/kafka-producer.json new file mode 100644 index 00000000..09eff447 --- /dev/null +++ b/grafana-dashboards/cfk/kafka-producer.json @@ -0,0 +1,4087 @@ +{ + "__inputs": [ + { + "description": "", + "label": "Prometheus", + "name": "DS_PROMETHEUS", + "pluginId": "prometheus", + "pluginName": "Prometheus", + "type": "datasource" + } + ], + "annotations": { + "list": [] + }, + "description": "Overview of the Kafka producers", + "editable": true, + "gnetId": null, + "hideControls": false, + "id": null, + "links": [], + "panels": [ + { + "cacheTimeout": null, + "collapsed": false, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 1, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Overview", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 2, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10, kafka_producer_producer_metrics_record_send_rate{namespace=\"$env\", client_type=\"producer\", client_id=~\"$client_id\", pod=~\"$server\"} > 0)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Record Send Rate", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "red", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 4, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 3, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10, kafka_producer_producer_metrics_record_error_rate{namespace=\"$env\", client_type=\"producer\", client_id=~\"$client_id\", pod=~\"$server\"} > 0)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Error Rate", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + }, + { + "color": "red", + "index": 2, + "line": true, + "op": "gt", + "value": 10.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 8, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 4, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10, kafka_producer_producer_metrics_record_retry_rate{namespace=\"$env\", client_type=\"producer\", client_id=~\"$client_id\", pod=~\"$server\"} > 0)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Retry Rate", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 12, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 5, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "count(kafka_producer_app_info{namespace=\"$env\", client_id=~\"$client_id\", version!=\"\", pod=~\"$server\"}) by (version)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{version}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Versions", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 6, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 7, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_incoming_byte_rate{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Incoming Byte Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 8, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_outgoing_byte_rate{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Outgoing Byte Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 9, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_metadata_age{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Metadata Age", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 10, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_request_rate{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Request Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 11, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_requests_in_flight{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Request in-flight", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 12, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_records_per_request_avg{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Records per Request (avg.)", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 13, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_record_send_rate{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Record Send Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 14, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_record_retry_rate{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Record Retry Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 15, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_record_error_rate{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Record Error Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 16, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_record_size_avg{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_record_size_max{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Record Size", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 17, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_record_queue_time_avg{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_record_queue_time_max{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Record Queue Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 18, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_produce_throttle_time_avg{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_produce_throttle_time_max{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Produce Throttle Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 5 + }, + "height": null, + "hideTimeOverride": false, + "id": 19, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_batch_size_avg{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_batch_size_max{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Batch Size", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 5 + }, + "height": null, + "hideTimeOverride": false, + "id": 20, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_batch_split_rate{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Batch Split Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 5 + }, + "height": null, + "hideTimeOverride": false, + "id": 21, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_compression_rate_avg{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Compression Rate (avg.)", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Performance", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 6 + }, + "height": null, + "hideTimeOverride": false, + "id": 22, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 6 + }, + "height": null, + "hideTimeOverride": false, + "id": 23, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_connection_count{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connection Count", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 6 + }, + "height": null, + "hideTimeOverride": false, + "id": 24, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_connection_creation_rate{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connection Creation Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 6 + }, + "height": null, + "hideTimeOverride": false, + "id": 25, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_connection_close_rate{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connection Close Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 7 + }, + "height": null, + "hideTimeOverride": false, + "id": 26, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_io_ratio{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "IO ratio", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 7 + }, + "height": null, + "hideTimeOverride": false, + "id": 27, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_io_wait_ratio{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "IO wait ratio", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 7 + }, + "height": null, + "hideTimeOverride": false, + "id": 28, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_select_rate{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Select Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ns" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 29, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_io_time_ns_avg{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "IO time avg.", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ns" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 30, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_io_wait_time_ns_avg{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "IO wait time avg.", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Connections", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 31, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 32, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_node_metrics_incoming_byte_rate{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} <- {{node_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Incoming Byte Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 33, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_node_metrics_outgoing_byte_rate{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} -> {{node_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Outgoing Byte Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 34, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_node_metrics_request_latency_avg{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} -> {{node_id}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_node_metrics_request_latency_max{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} -> {{node_id}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Request Latency", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 9 + }, + "height": null, + "hideTimeOverride": false, + "id": 35, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_node_metrics_request_rate{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} -> {{node_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Request Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 9 + }, + "height": null, + "hideTimeOverride": false, + "id": 36, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_node_metrics_response_rate{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} <- {{node_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Response Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Per Broker", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 10 + }, + "height": null, + "hideTimeOverride": false, + "id": 37, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 10 + }, + "height": null, + "hideTimeOverride": false, + "id": 38, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_topic_metrics_byte_rate{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} -> {{topic}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Byte Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 10 + }, + "height": null, + "hideTimeOverride": false, + "id": 39, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_topic_metrics_compression_rate{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} -> {{topic}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Compression Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 10 + }, + "height": null, + "hideTimeOverride": false, + "id": 40, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_topic_metrics_record_send_rate{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} -> {{topic}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Record Send Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 11 + }, + "height": null, + "hideTimeOverride": false, + "id": 41, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_topic_metrics_record_retry_rate{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} -> {{topic}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Record Retry Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 11 + }, + "height": null, + "hideTimeOverride": false, + "id": 42, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_topic_metrics_record_error_rate{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} -> {{topic}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Record Error Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Per Topic", + "transformations": [], + "transparent": false, + "type": "row" + } + ], + "refresh": "30s", + "rows": [], + "schemaVersion": 12, + "sharedCrosshair": false, + "style": "dark", + "tags": [ + "confluent", + "kafka-client", + "kafka-producer" + ], + "templating": { + "list": [ + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "Prometheus", + "hide": 0, + "includeAll": false, + "label": "Environment", + "multi": false, + "name": "env", + "options": [], + "query": "label_values(namespace)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "Prometheus", + "hide": 0, + "includeAll": true, + "label": "Server", + "multi": true, + "name": "server", + "options": [], + "query": "label_values(kafka_producer_producer_metrics_record_retry_rate{namespace=\"$env\", client_type=\"producer\"},pod)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "Prometheus", + "hide": 0, + "includeAll": true, + "label": "Client ID", + "multi": true, + "name": "client_id", + "options": [], + "query": "label_values(kafka_producer_producer_metrics_record_retry_rate{namespace=\"$env\", client_type=\"producer\"},client_id)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "hidden": false, + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Kafka Producer - v2", + "uid": null, + "version": 0 +} diff --git a/grafana-dashboards/default/kafka-consumer.json b/grafana-dashboards/default/kafka-consumer.json new file mode 100644 index 00000000..f82e02b3 --- /dev/null +++ b/grafana-dashboards/default/kafka-consumer.json @@ -0,0 +1,4363 @@ +{ + "__inputs": [ + { + "description": "", + "label": "Prometheus", + "name": "DS_PROMETHEUS", + "pluginId": "prometheus", + "pluginName": "Prometheus", + "type": "datasource" + } + ], + "annotations": { + "list": [] + }, + "description": "Overview of the Kafka consumers", + "editable": true, + "gnetId": null, + "hideControls": false, + "id": null, + "links": [], + "panels": [ + { + "cacheTimeout": null, + "collapsed": false, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 1, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Overview", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 2, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10, kafka_consumer_consumer_fetch_manager_metrics_records_consumed_rate{env=\"$env\", client_type=\"producer\", client_id=~\"$client_id\", hostname=~\"$server\"} > 0)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Record Consumed Rate", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 4, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 3, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10, kafka_consumer_consumer_fetch_manager_metrics_records_consumed_rate{env=\"$env\", client_type=\"producer\", client_id=~\"$client_id\", hostname=~\"$server\"} > 0)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Records Lag", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + }, + { + "color": "red", + "index": 2, + "line": true, + "op": "gt", + "value": 10.0, + "yaxis": "left" + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 8, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 4, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10, kafka_consumer_consumer_coordinator_metrics_last_rebalance_seconds_ago{env=\"$env\", client_type=\"producer\", client_id=~\"$client_id\", hostname=~\"$server\"} > 0)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Last Rebalance Seconds Ago", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 12, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 5, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "count(kafka_consumer_app_info{env=\"$env\", client_id=~\"$client_id\", version!=\"\", hostname=~\"$server\"}) by (version)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{version}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Versions", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 6, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 7, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_bytes_consumed_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Bytes Consumed Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "cts" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 8, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_records_consumed_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Records Consumed Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 9, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_records_lag_max{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Records Lag Max", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 10, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Fetch Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 11, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_latency_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_latency_max{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Fetch Latency", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 12, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_size_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_size_max{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Fetch Size", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 13, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_throttle_time_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_throttle_time_max{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Fetch Throttle Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 14, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_records_per_request_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Records per Request Avg.", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Performance", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 15, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 16, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_commit_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Commit Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 17, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_join_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Join Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 18, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_sync_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Sync Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 19, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_commit_latency_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_commit_latency_max{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Commit Latency", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 20, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_join_time_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_join_time_max{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Join Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 21, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_sync_time_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_sync_time_max{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Sync Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 22, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_heartbeat_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Heartbeat Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 23, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_heartbeat_response_time_max{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Heartbeat Response Time (Max.)", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 24, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_last_heartbeat_seconds_ago{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Last Heartbeat Seconds Ago", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 25, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_assigned_partitions{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Assigned Partitions", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Consumer group", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 26, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 27, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_metrics_connection_count{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connection Count", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 28, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_metrics_connection_creation_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connection Creation Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 29, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_metrics_connection_close_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connection Close Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 9 + }, + "height": null, + "hideTimeOverride": false, + "id": 30, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_metrics_io_ratio{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "IO ratio", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 9 + }, + "height": null, + "hideTimeOverride": false, + "id": 31, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_metrics_io_wait_ratio{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "IO wait ratio", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 9 + }, + "height": null, + "hideTimeOverride": false, + "id": 32, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_metrics_select_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Select Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ns" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 10 + }, + "height": null, + "hideTimeOverride": false, + "id": 33, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_metrics_io_time_ns_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "IO time avg.", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ns" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 10 + }, + "height": null, + "hideTimeOverride": false, + "id": 34, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_metrics_io_wait_time_ns_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "IO wait time avg.", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Connections", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 11 + }, + "height": null, + "hideTimeOverride": false, + "id": 35, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 11 + }, + "height": null, + "hideTimeOverride": false, + "id": 36, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_node_metrics_incoming_byte_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} <- {{node_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Incoming Byte Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 11 + }, + "height": null, + "hideTimeOverride": false, + "id": 37, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_node_metrics_outgoing_byte_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} -> {{node_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Outgoing Byte Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 11 + }, + "height": null, + "hideTimeOverride": false, + "id": 38, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_node_metrics_request_latency_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} -> {{node_id}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_node_metrics_request_latency_max{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} -> {{node_id}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Request Latency", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 12 + }, + "height": null, + "hideTimeOverride": false, + "id": 39, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_node_metrics_request_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} -> {{node_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Request Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 12 + }, + "height": null, + "hideTimeOverride": false, + "id": 40, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_node_metrics_response_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} <- {{node_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Response Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Per Broker", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 13 + }, + "height": null, + "hideTimeOverride": false, + "id": 41, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 11 + }, + "height": null, + "hideTimeOverride": false, + "id": 42, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_bytes_consumed_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} <- {{topic}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Bytes Consumed Rate per Topic", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 11 + }, + "height": null, + "hideTimeOverride": false, + "id": 43, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_records_consumed_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} <- {{topic}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Records Consumed Rate per Topic", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 12 + }, + "height": null, + "hideTimeOverride": false, + "id": 44, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_size_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} <- {{topic}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_size_max{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} <- {{topic}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Fetch Size per Topic", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 11 + }, + "height": null, + "hideTimeOverride": false, + "id": 45, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_records_per_request_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} <- {{topic}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Records per Request Avg. per Topic", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Per Topic", + "transformations": [], + "transparent": false, + "type": "row" + } + ], + "refresh": "30s", + "rows": [], + "schemaVersion": 12, + "sharedCrosshair": false, + "style": "dark", + "tags": [ + "confluent", + "kafka-client", + "kafka-consumer" + ], + "templating": { + "list": [ + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "Prometheus", + "hide": 0, + "includeAll": false, + "label": "Environment", + "multi": false, + "name": "env", + "options": [], + "query": "label_values(env)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "Prometheus", + "hide": 0, + "includeAll": true, + "label": "Server", + "multi": true, + "name": "server", + "options": [], + "query": "label_values(kafka_consumer_consumer_metrics_incoming_byte_rate{env=\"$env\", client_type=\"consumer\"},hostname)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "Prometheus", + "hide": 0, + "includeAll": true, + "label": "Client ID", + "multi": true, + "name": "client_id", + "options": [], + "query": "label_values(kafka_consumer_consumer_metrics_incoming_byte_rate{env=\"$env\", client_type=\"consumer\"},client_id)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "hidden": false, + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Kafka Consumer - v2", + "uid": null, + "version": 0 +} diff --git a/grafana-dashboards/default/kafka-producer.json b/grafana-dashboards/default/kafka-producer.json new file mode 100644 index 00000000..66acd77d --- /dev/null +++ b/grafana-dashboards/default/kafka-producer.json @@ -0,0 +1,4087 @@ +{ + "__inputs": [ + { + "description": "", + "label": "Prometheus", + "name": "DS_PROMETHEUS", + "pluginId": "prometheus", + "pluginName": "Prometheus", + "type": "datasource" + } + ], + "annotations": { + "list": [] + }, + "description": "Overview of the Kafka producers", + "editable": true, + "gnetId": null, + "hideControls": false, + "id": null, + "links": [], + "panels": [ + { + "cacheTimeout": null, + "collapsed": false, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 1, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Overview", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 2, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10, kafka_producer_producer_metrics_record_send_rate{env=\"$env\", client_type=\"producer\", client_id=~\"$client_id\", hostname=~\"$server\"} > 0)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Record Send Rate", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "red", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 4, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 3, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10, kafka_producer_producer_metrics_record_error_rate{env=\"$env\", client_type=\"producer\", client_id=~\"$client_id\", hostname=~\"$server\"} > 0)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Error Rate", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + }, + { + "color": "red", + "index": 2, + "line": true, + "op": "gt", + "value": 10.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 8, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 4, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10, kafka_producer_producer_metrics_record_retry_rate{env=\"$env\", client_type=\"producer\", client_id=~\"$client_id\", hostname=~\"$server\"} > 0)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Retry Rate", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 12, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 5, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "count(kafka_producer_app_info{env=\"$env\", client_id=~\"$client_id\", version!=\"\", hostname=~\"$server\"}) by (version)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{version}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Versions", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 6, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 7, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_incoming_byte_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Incoming Byte Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 8, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_outgoing_byte_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Outgoing Byte Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 9, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_metadata_age{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Metadata Age", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 10, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_request_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Request Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 11, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_requests_in_flight{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Request in-flight", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 12, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_records_per_request_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Records per Request (avg.)", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 13, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_record_send_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Record Send Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 14, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_record_retry_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Record Retry Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 15, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_record_error_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Record Error Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 16, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_record_size_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_record_size_max{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Record Size", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 17, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_record_queue_time_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_record_queue_time_max{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Record Queue Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 18, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_produce_throttle_time_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_produce_throttle_time_max{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Produce Throttle Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 5 + }, + "height": null, + "hideTimeOverride": false, + "id": 19, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_batch_size_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_batch_size_max{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Batch Size", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 5 + }, + "height": null, + "hideTimeOverride": false, + "id": 20, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_batch_split_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Batch Split Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 5 + }, + "height": null, + "hideTimeOverride": false, + "id": 21, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_compression_rate_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Compression Rate (avg.)", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Performance", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 6 + }, + "height": null, + "hideTimeOverride": false, + "id": 22, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 6 + }, + "height": null, + "hideTimeOverride": false, + "id": 23, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_connection_count{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connection Count", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 6 + }, + "height": null, + "hideTimeOverride": false, + "id": 24, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_connection_creation_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connection Creation Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 6 + }, + "height": null, + "hideTimeOverride": false, + "id": 25, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_connection_close_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connection Close Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 7 + }, + "height": null, + "hideTimeOverride": false, + "id": 26, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_io_ratio{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "IO ratio", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 7 + }, + "height": null, + "hideTimeOverride": false, + "id": 27, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_io_wait_ratio{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "IO wait ratio", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 7 + }, + "height": null, + "hideTimeOverride": false, + "id": 28, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_select_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Select Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ns" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 29, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_io_time_ns_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "IO time avg.", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ns" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 30, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_io_wait_time_ns_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "IO wait time avg.", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Connections", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 31, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 32, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_node_metrics_incoming_byte_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} <- {{node_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Incoming Byte Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 33, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_node_metrics_outgoing_byte_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} -> {{node_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Outgoing Byte Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 34, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_node_metrics_request_latency_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} -> {{node_id}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_node_metrics_request_latency_max{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} -> {{node_id}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Request Latency", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 9 + }, + "height": null, + "hideTimeOverride": false, + "id": 35, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_node_metrics_request_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} -> {{node_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Request Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 9 + }, + "height": null, + "hideTimeOverride": false, + "id": 36, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_node_metrics_response_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} <- {{node_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Response Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Per Broker", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 10 + }, + "height": null, + "hideTimeOverride": false, + "id": 37, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 10 + }, + "height": null, + "hideTimeOverride": false, + "id": 38, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_topic_metrics_byte_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} -> {{topic}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Byte Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 10 + }, + "height": null, + "hideTimeOverride": false, + "id": 39, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_topic_metrics_compression_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} -> {{topic}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Compression Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 10 + }, + "height": null, + "hideTimeOverride": false, + "id": 40, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_topic_metrics_record_send_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} -> {{topic}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Record Send Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 11 + }, + "height": null, + "hideTimeOverride": false, + "id": 41, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_topic_metrics_record_retry_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} -> {{topic}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Record Retry Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 11 + }, + "height": null, + "hideTimeOverride": false, + "id": 42, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_topic_metrics_record_error_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} -> {{topic}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Record Error Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Per Topic", + "transformations": [], + "transparent": false, + "type": "row" + } + ], + "refresh": "30s", + "rows": [], + "schemaVersion": 12, + "sharedCrosshair": false, + "style": "dark", + "tags": [ + "confluent", + "kafka-client", + "kafka-producer" + ], + "templating": { + "list": [ + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "Prometheus", + "hide": 0, + "includeAll": false, + "label": "Environment", + "multi": false, + "name": "env", + "options": [], + "query": "label_values(env)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "Prometheus", + "hide": 0, + "includeAll": true, + "label": "Server", + "multi": true, + "name": "server", + "options": [], + "query": "label_values(kafka_producer_producer_metrics_record_retry_rate{env=\"$env\", client_type=\"producer\"},hostname)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "Prometheus", + "hide": 0, + "includeAll": true, + "label": "Client ID", + "multi": true, + "name": "client_id", + "options": [], + "query": "label_values(kafka_producer_producer_metrics_record_retry_rate{env=\"$env\", client_type=\"producer\"},client_id)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "hidden": false, + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Kafka Producer - v2", + "uid": null, + "version": 0 +} diff --git a/grafana-dashboards/kafka-consumer.py b/grafana-dashboards/kafka-consumer.py new file mode 100644 index 00000000..620815d1 --- /dev/null +++ b/grafana-dashboards/kafka-consumer.py @@ -0,0 +1,1093 @@ +import os +import grafanalib.core as G + + +def dashboard(env_label="namespace", server_label="pod"): + default_height = 5 + stat_width = 4 + ts_width = 8 + + templating = G.Templating( + list=[ + G.Template( + name="env", + label="Environment", + dataSource="Prometheus", + query="label_values(" + env_label + ")", + ), + G.Template( + name="server", + label="Server", + dataSource="Prometheus", + query="label_values(kafka_consumer_consumer_metrics_incoming_byte_rate{" + + env_label + + '="$env", client_type="consumer"},' + + server_label + + ")", + multi=True, + includeAll=True, + ), + G.Template( + name="client_id", + label="Client ID", + dataSource="Prometheus", + query="label_values(kafka_consumer_consumer_metrics_incoming_byte_rate{" + + env_label + + '="$env", client_type="consumer"},client_id)', + multi=True, + includeAll=True, + ), + ] + ) + + topk = "10" + + overview_base = 0 + overview_panels = [ + G.RowPanel( + title="Overview", + gridPos=G.GridPos(h=1, w=24, x=0, y=0), + ), + G.Stat( + title="Record Consumed Rate", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ", kafka_consumer_consumer_fetch_manager_metrics_records_consumed_rate{" + + env_label + + '="$env", client_type="producer", client_id=~"$client_id", ' + + server_label + + '=~"$server"} > 0)', + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 0, y=0), + ), + G.Stat( + title="Records Lag", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ", kafka_consumer_consumer_fetch_manager_metrics_records_consumed_rate{" + + env_label + + '="$env", client_type="producer", client_id=~"$client_id", ' + + server_label + + '=~"$server"} > 0)', + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 1, y=0), + ), + G.Stat( + title="Last Rebalance Seconds Ago", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ", kafka_consumer_consumer_coordinator_metrics_last_rebalance_seconds_ago{" + + env_label + + '="$env", client_type="producer", client_id=~"$client_id", ' + + server_label + + '=~"$server"} > 0)', + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + reduceCalc="last", + format="s", + thresholds=[ + G.Threshold(index=0, value=0.0, color="green"), + G.Threshold(index=1, value=1.0, color="yellow"), + G.Threshold(index=2, value=10.0, color="red"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 2, y=0), + ), + G.Stat( + title="Versions", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="count(kafka_consumer_app_info{" + + env_label + + '="$env", client_id=~"$client_id", version!="", ' + + server_label + + '=~"$server"}) by (version)', + legendFormat="{{version}}", + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 3, y=0), + ), + ] + + performance_base = overview_base + 1 + performance_inner = [ + G.TimeSeries( + title="Bytes Consumed Rate", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_fetch_manager_metrics_bytes_consumed_rate{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="binBps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=performance_base + ), + ), + G.TimeSeries( + title="Records Consumed Rate", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_fetch_manager_metrics_records_consumed_rate{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="cts", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=performance_base + ), + ), + G.TimeSeries( + title="Records Lag Max", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_fetch_manager_metrics_records_lag_max{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=performance_base + ), + ), + + G.TimeSeries( + title="Fetch Rate", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_fetch_manager_metrics_fetch_rate{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=performance_base + 1 + ), + ), + G.TimeSeries( + title="Fetch Latency", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_fetch_manager_metrics_fetch_latency_avg{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + server_label + "}} (avg.)", + ), + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_fetch_manager_metrics_fetch_latency_max{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + server_label + "}} (max.)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=performance_base + 1 + ), + ), + G.TimeSeries( + title="Fetch Size", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_fetch_manager_metrics_fetch_size_avg{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + server_label + "}} (avg.)", + ), + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_fetch_manager_metrics_fetch_size_max{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + server_label + "}} (max.)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=performance_base + 1 + ), + ), + + G.TimeSeries( + title="Fetch Throttle Time", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_fetch_manager_metrics_fetch_throttle_time_avg{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + server_label + "}} (avg.)", + ), + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_fetch_manager_metrics_fetch_throttle_time_max{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + server_label + "}} (max.)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=performance_base + 2 + ), + ), + G.TimeSeries( + title="Records per Request Avg.", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_fetch_manager_metrics_fetch_records_per_request_avg{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=performance_base + 2 + ), + ), + ] + performance_panels = [ + G.RowPanel( + title="Performance", + gridPos=G.GridPos(h=1, w=24, x=0, y=performance_base), + collapsed=True, + panels=performance_inner, + ), + ] + + group_base = performance_base + 3 + group_inner = [ + G.TimeSeries( + title="Commit Rate", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_coordinator_metrics_commit_rate{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=performance_base + 0 + ), + ), + G.TimeSeries( + title="Join Rate", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_coordinator_metrics_join_rate{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=performance_base + 0 + ), + ), + G.TimeSeries( + title="Sync Rate", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_coordinator_metrics_sync_rate{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=performance_base + 0 + ), + ), + + G.TimeSeries( + title="Commit Latency", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_coordinator_metrics_commit_latency_avg{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + server_label + "}} (avg.)", + ), + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_coordinator_metrics_commit_latency_max{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + server_label + "}} (max.)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=performance_base + 1 + ), + ), + G.TimeSeries( + title="Join Time", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_coordinator_metrics_join_time_avg{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + server_label + "}} (avg.)", + ), + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_coordinator_metrics_join_time_max{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + server_label + "}} (max.)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=performance_base + 1 + ), + ), + G.TimeSeries( + title="Sync Time", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_coordinator_metrics_sync_time_avg{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + server_label + "}} (avg.)", + ), + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_coordinator_metrics_sync_time_max{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + server_label + "}} (max.)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=performance_base + 1 + ), + ), + + G.TimeSeries( + title="Heartbeat Rate", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_coordinator_metrics_heartbeat_rate{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=performance_base + 2 + ), + ), + G.TimeSeries( + title="Heartbeat Response Time (Max.)", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_coordinator_metrics_heartbeat_response_time_max{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=performance_base + 2 + ), + ), + G.TimeSeries( + title="Last Heartbeat Seconds Ago", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_coordinator_metrics_last_heartbeat_seconds_ago{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="s", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=performance_base + 2 + ), + ), + + G.TimeSeries( + title="Assigned Partitions", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_coordinator_metrics_assigned_partitions{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=performance_base + 3 + ), + ), + ] + group_panels = [ + G.RowPanel( + title="Consumer group", + gridPos=G.GridPos(h=1, w=24, x=0, y=group_base), + collapsed=True, + panels=group_inner, + ), + ] + + connection_base = group_base + 4 + connection_inner = [ + G.TimeSeries( + title="Connection Count", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_metrics_connection_count{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=connection_base + ), + ), + G.TimeSeries( + title="Connection Creation Rate", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_metrics_connection_creation_rate{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="cps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=connection_base + ), + ), + G.TimeSeries( + title="Connection Close Rate", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_metrics_connection_close_rate{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="cps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=connection_base + ), + ), + G.TimeSeries( + title="IO ratio", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_metrics_io_ratio{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + # unit="percentunit", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=connection_base + 1 + ), + ), + G.TimeSeries( + title="IO wait ratio", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_metrics_io_wait_ratio{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + # unit="percentunit", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=connection_base + 1 + ), + ), + G.TimeSeries( + title="Select Rate", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_metrics_select_rate{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="cps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=connection_base + 1 + ), + ), + G.TimeSeries( + title="IO time avg.", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_metrics_io_time_ns_avg{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ns", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=connection_base + 2 + ), + ), + G.TimeSeries( + title="IO wait time avg.", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_metrics_io_wait_time_ns_avg{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ns", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=connection_base + 2 + ), + ), + ] + connection_panels = [ + G.RowPanel( + title="Connections", + gridPos=G.GridPos(h=1, w=24, x=0, y=connection_base), + collapsed=True, + panels=connection_inner, + ), + ] + + per_broker_base = connection_base + 3 + per_broker_inner = [ + G.TimeSeries( + title="Incoming Byte Rate", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_node_metrics_incoming_byte_rate{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + + server_label + + "}} <- {{node_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="binBps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=per_broker_base + ), + ), + G.TimeSeries( + title="Outgoing Byte Rate", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_node_metrics_outgoing_byte_rate{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + + server_label + + "}} -> {{node_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="binBps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=per_broker_base + ), + ), + G.TimeSeries( + title="Request Latency", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_node_metrics_request_latency_avg{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + + server_label + + "}} -> {{node_id}} (avg.)", + ), + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_node_metrics_request_latency_max{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + + server_label + + "}} -> {{node_id}} (max.)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=per_broker_base + ), + ), + G.TimeSeries( + title="Request Rate", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_node_metrics_request_rate{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + + server_label + + "}} -> {{node_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="reqps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=per_broker_base + 1 + ), + ), + G.TimeSeries( + title="Response Rate", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_node_metrics_response_rate{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + + server_label + + "}} <- {{node_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="reqps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=per_broker_base + 1 + ), + ), + ] + per_broker_panels = [ + G.RowPanel( + title="Per Broker", + gridPos=G.GridPos(h=1, w=24, x=0, y=per_broker_base), + collapsed=True, + panels=per_broker_inner, + ), + ] + + per_topic_base = per_broker_base + 2 + per_topic_inner = [ + G.TimeSeries( + title="Bytes Consumed Rate per Topic", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_fetch_manager_metrics_bytes_consumed_rate{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + + server_label + + "}} <- {{topic}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="binBps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=per_broker_base + 0 + ), + ), + G.TimeSeries( + title="Records Consumed Rate per Topic", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_fetch_manager_metrics_records_consumed_rate{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + + server_label + + "}} <- {{topic}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="cps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=per_broker_base + 0 + ), + ), + + G.TimeSeries( + title="Fetch Size per Topic", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_fetch_manager_metrics_fetch_size_avg{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + + server_label + + "}} <- {{topic}} (avg.)", + ), + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_fetch_manager_metrics_fetch_size_max{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + + server_label + + "}} <- {{topic}} (max.)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="bytes", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=per_broker_base + 1 + ), + ), + G.TimeSeries( + title="Records per Request Avg. per Topic", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_fetch_manager_metrics_records_per_request_avg{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + + server_label + + "}} <- {{topic}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=per_broker_base + 0 + ), + ), + ] + per_topic_panels = [ + G.RowPanel( + title="Per Topic", + gridPos=G.GridPos(h=1, w=24, x=0, y=per_topic_base), + collapsed=True, + panels=per_topic_inner, + ), + ] + + panels = ( + overview_panels + + performance_panels + + group_panels + + connection_panels + + per_broker_panels + + per_topic_panels + ) + + return G.Dashboard( + title="Kafka Consumer - v2", + description="Overview of the Kafka consumers", + tags=["confluent", "kafka-client", "kafka-consumer"], + inputs=[ + G.DataSourceInput( + name="DS_PROMETHEUS", + label="Prometheus", + pluginId="prometheus", + pluginName="Prometheus", + ) + ], + templating=templating, + timezone="browser", + panels=panels, + refresh="30s", + ).auto_panel_ids() + + +env_label = os.environ.get("ENV_LABEL", "env") +server_label = os.environ.get("SERVER_LABEL", "hostname") +dashboard = dashboard(env_label, server_label) diff --git a/grafana-dashboards/kafka-producer.py b/grafana-dashboards/kafka-producer.py new file mode 100644 index 00000000..7fbddc5a --- /dev/null +++ b/grafana-dashboards/kafka-producer.py @@ -0,0 +1,1000 @@ +import os +import grafanalib.core as G + + +def dashboard(env_label="namespace", server_label="pod"): + default_height = 5 + stat_width = 4 + ts_width = 8 + + templating = G.Templating( + list=[ + G.Template( + name="env", + label="Environment", + dataSource="Prometheus", + query="label_values(" + env_label + ")", + ), + G.Template( + name="server", + label="Server", + dataSource="Prometheus", + query="label_values(kafka_producer_producer_metrics_record_retry_rate{" + + env_label + + '="$env", client_type="producer"},' + + server_label + + ")", + multi=True, + includeAll=True, + ), + G.Template( + name="client_id", + label="Client ID", + dataSource="Prometheus", + query="label_values(kafka_producer_producer_metrics_record_retry_rate{" + + env_label + + '="$env", client_type="producer"},client_id)', + multi=True, + includeAll=True, + ), + ] + ) + + topk = "10" + + overview_panels = [ + G.RowPanel( + title="Overview", + gridPos=G.GridPos(h=1, w=24, x=0, y=0), + ), + G.Stat( + title="Record Send Rate", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ", kafka_producer_producer_metrics_record_send_rate{" + + env_label + + '="$env", client_type="producer", client_id=~"$client_id", ' + + server_label + + '=~"$server"} > 0)', + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 0, y=0), + ), + G.Stat( + title="Error Rate", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ", kafka_producer_producer_metrics_record_error_rate{" + + env_label + + '="$env", client_type="producer", client_id=~"$client_id", ' + + server_label + + '=~"$server"} > 0)', + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="green"), + G.Threshold(index=1, value=1.0, color="red"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 1, y=0), + ), + G.Stat( + title="Retry Rate", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ", kafka_producer_producer_metrics_record_retry_rate{" + + env_label + + '="$env", client_type="producer", client_id=~"$client_id", ' + + server_label + + '=~"$server"} > 0)', + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="green"), + G.Threshold(index=1, value=1.0, color="yellow"), + G.Threshold(index=2, value=10.0, color="red"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 2, y=0), + ), + G.Stat( + title="Versions", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="count(kafka_producer_app_info{" + + env_label + + '="$env", client_id=~"$client_id", version!="", ' + + server_label + + '=~"$server"}) by (version)', + legendFormat="{{version}}", + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="blue"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 3, y=0), + ), + ] + + performance_base = 1 + performance_inner = [ + G.TimeSeries( + title="Incoming Byte Rate", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_producer_producer_metrics_incoming_byte_rate{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="binBps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=performance_base + 0 + ), + ), + G.TimeSeries( + title="Outgoing Byte Rate", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_producer_producer_metrics_outgoing_byte_rate{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="binBps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=performance_base + 0 + ), + ), + G.TimeSeries( + title="Metadata Age", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_producer_producer_metrics_metadata_age{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="s", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=performance_base + 0 + ), + ), + G.TimeSeries( + title="Request Rate", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_producer_producer_metrics_request_rate{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="reqps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=performance_base + 1 + ), + ), + G.TimeSeries( + title="Request in-flight", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_producer_producer_metrics_requests_in_flight{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="reqps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=performance_base + 1 + ), + ), + G.TimeSeries( + title="Records per Request (avg.)", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_producer_producer_metrics_records_per_request_avg{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=performance_base + 1 + ), + ), + G.TimeSeries( + title="Record Send Rate", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_producer_producer_metrics_record_send_rate{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="cps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=performance_base + 2 + ), + ), + G.TimeSeries( + title="Record Retry Rate", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_producer_producer_metrics_record_retry_rate{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="cps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=performance_base + 2 + ), + ), + G.TimeSeries( + title="Record Error Rate", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_producer_producer_metrics_record_error_rate{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="cps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=performance_base + 2 + ), + ), + G.TimeSeries( + title="Record Size", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_producer_producer_metrics_record_size_avg{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + server_label + "}} (avg.)", + ), + G.Target( + expr="topk(" + + topk + + ",kafka_producer_producer_metrics_record_size_max{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + server_label + "}} (max.)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="bytes", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=performance_base + 3 + ), + ), + G.TimeSeries( + title="Record Queue Time", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_producer_producer_metrics_record_queue_time_avg{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + server_label + "}} (avg.)", + ), + G.Target( + expr="topk(" + + topk + + ",kafka_producer_producer_metrics_record_queue_time_max{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + server_label + "}} (max.)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=performance_base + 3 + ), + ), + G.TimeSeries( + title="Produce Throttle Time", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_producer_producer_metrics_produce_throttle_time_avg{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + server_label + "}} (avg.)", + ), + G.Target( + expr="topk(" + + topk + + ",kafka_producer_producer_metrics_produce_throttle_time_max{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + server_label + "}} (max.)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=performance_base + 3 + ), + ), + G.TimeSeries( + title="Batch Size", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_producer_producer_metrics_batch_size_avg{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + server_label + "}} (avg.)", + ), + G.Target( + expr="topk(" + + topk + + ",kafka_producer_producer_metrics_batch_size_max{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + server_label + "}} (max.)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="bytes", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=performance_base + 4 + ), + ), + G.TimeSeries( + title="Batch Split Rate", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_producer_producer_metrics_batch_split_rate{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="cps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=performance_base + 4 + ), + ), + G.TimeSeries( + title="Compression Rate (avg.)", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_producer_producer_metrics_compression_rate_avg{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="percentunit", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=performance_base + 4 + ), + ), + ] + performance_panels = [ + G.RowPanel( + title="Performance", + gridPos=G.GridPos(h=1, w=24, x=0, y=performance_base), + collapsed=True, + panels=performance_inner, + ), + ] + + connection_base = performance_base + 5 + connection_inner = [ + G.TimeSeries( + title="Connection Count", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_producer_producer_metrics_connection_count{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=connection_base + ), + ), + G.TimeSeries( + title="Connection Creation Rate", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_producer_producer_metrics_connection_creation_rate{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="cps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=connection_base + ), + ), + G.TimeSeries( + title="Connection Close Rate", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_producer_producer_metrics_connection_close_rate{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="cps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=connection_base + ), + ), + G.TimeSeries( + title="IO ratio", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_producer_producer_metrics_io_ratio{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + # unit="percentunit", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=connection_base + 1 + ), + ), + G.TimeSeries( + title="IO wait ratio", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_producer_producer_metrics_io_wait_ratio{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + # unit="percentunit", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=connection_base + 1 + ), + ), + G.TimeSeries( + title="Select Rate", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_producer_producer_metrics_select_rate{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="cps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=connection_base + 1 + ), + ), + G.TimeSeries( + title="IO time avg.", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_producer_producer_metrics_io_time_ns_avg{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ns", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=connection_base + 2 + ), + ), + G.TimeSeries( + title="IO wait time avg.", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_producer_producer_metrics_io_wait_time_ns_avg{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ns", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=connection_base + 2 + ), + ), + ] + connection_panels = [ + G.RowPanel( + title="Connections", + gridPos=G.GridPos(h=1, w=24, x=0, y=connection_base), + collapsed=True, + panels=connection_inner, + ), + ] + + per_broker_base = connection_base + 2 + per_broker_inner = [ + G.TimeSeries( + title="Incoming Byte Rate", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_producer_producer_node_metrics_incoming_byte_rate{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + + server_label + + "}} <- {{node_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="binBps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=per_broker_base + ), + ), + G.TimeSeries( + title="Outgoing Byte Rate", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_producer_producer_node_metrics_outgoing_byte_rate{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + + server_label + + "}} -> {{node_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="binBps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=per_broker_base + ), + ), + G.TimeSeries( + title="Request Latency", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_producer_producer_node_metrics_request_latency_avg{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + + server_label + + "}} -> {{node_id}} (avg.)", + ), + G.Target( + expr="topk(" + + topk + + ",kafka_producer_producer_node_metrics_request_latency_max{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + + server_label + + "}} -> {{node_id}} (max.)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=per_broker_base + ), + ), + G.TimeSeries( + title="Request Rate", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_producer_producer_node_metrics_request_rate{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + + server_label + + "}} -> {{node_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="reqps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=per_broker_base + 1 + ), + ), + G.TimeSeries( + title="Response Rate", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_producer_producer_node_metrics_response_rate{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + + server_label + + "}} <- {{node_id}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="reqps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=per_broker_base + 1 + ), + ), + ] + per_broker_panels = [ + G.RowPanel( + title="Per Broker", + gridPos=G.GridPos(h=1, w=24, x=0, y=per_broker_base), + collapsed=True, + panels=per_broker_inner, + ), + ] + + per_topic_base = per_broker_base + 2 + per_topic_inner = [ + G.TimeSeries( + title="Byte Rate", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_producer_producer_topic_metrics_byte_rate{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + server_label + "}} -> {{topic}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="binBps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=per_topic_base + ), + ), + G.TimeSeries( + title="Compression Rate", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_producer_producer_topic_metrics_compression_rate{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + server_label + "}} -> {{topic}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="percentunit", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=per_topic_base + ), + ), + G.TimeSeries( + title="Record Send Rate", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_producer_producer_topic_metrics_record_send_rate{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + server_label + "}} -> {{topic}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="cps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=per_topic_base + ), + ), + G.TimeSeries( + title="Record Retry Rate", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_producer_producer_topic_metrics_record_retry_rate{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + server_label + "}} -> {{topic}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="cps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=per_topic_base + 1 + ), + ), + G.TimeSeries( + title="Record Error Rate", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_producer_producer_topic_metrics_record_error_rate{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + server_label + "}} -> {{topic}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="cps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=per_topic_base + 1 + ), + ), + ] + per_topic_panels = [ + G.RowPanel( + title="Per Topic", + gridPos=G.GridPos(h=1, w=24, x=0, y=per_topic_base), + collapsed=True, + panels=per_topic_inner, + ), + ] + + panels = ( + overview_panels + + performance_panels + + connection_panels + + per_broker_panels + + per_topic_panels + ) + + return G.Dashboard( + title="Kafka Producer - v2", + description="Overview of the Kafka producers", + tags=["confluent", "kafka-client", "kafka-producer"], + inputs=[ + G.DataSourceInput( + name="DS_PROMETHEUS", + label="Prometheus", + pluginId="prometheus", + pluginName="Prometheus", + ) + ], + templating=templating, + timezone="browser", + panels=panels, + refresh="30s", + ).auto_panel_ids() + + +env_label = os.environ.get("ENV_LABEL", "env") +server_label = os.environ.get("SERVER_LABEL", "hostname") +dashboard = dashboard(env_label, server_label) From 873c092ac69349743f195f70c94e91b8468d411d Mon Sep 17 00:00:00 2001 From: Jorge Esteban Quilcate Otoya Date: Mon, 11 Jul 2022 18:30:29 +0100 Subject: [PATCH 22/28] feat: kafka quotas dashboard --- grafana-dashboards/Makefile | 1 + grafana-dashboards/cfk/kafka-consumer.json | 483 ++++++++--- grafana-dashboards/cfk/kafka-quotas.json | 779 ++++++++++++++++++ .../default/kafka-consumer.json | 483 ++++++++--- grafana-dashboards/default/kafka-quotas.json | 779 ++++++++++++++++++ grafana-dashboards/kafka-consumer.py | 114 ++- grafana-dashboards/kafka-quotas.py | 220 +++++ 7 files changed, 2569 insertions(+), 290 deletions(-) create mode 100644 grafana-dashboards/cfk/kafka-quotas.json create mode 100644 grafana-dashboards/default/kafka-quotas.json create mode 100644 grafana-dashboards/kafka-quotas.py diff --git a/grafana-dashboards/Makefile b/grafana-dashboards/Makefile index 9ac68244..4fbffb0d 100644 --- a/grafana-dashboards/Makefile +++ b/grafana-dashboards/Makefile @@ -22,6 +22,7 @@ dashboards: @generate-dashboard ksqldb-cluster.py -o $(OUTPUT_DIR)/ksqldb-cluster.json @generate-dashboard kafka-producer.py -o $(OUTPUT_DIR)/kafka-producer.json @generate-dashboard kafka-consumer.py -o $(OUTPUT_DIR)/kafka-consumer.json + @generate-dashboard kafka-quotas.py -o $(OUTPUT_DIR)/kafka-quotas.json cfk: OUTPUT_DIR=cfk cfk: export ENV_LABEL=namespace diff --git a/grafana-dashboards/cfk/kafka-consumer.json b/grafana-dashboards/cfk/kafka-consumer.json index 8f23a386..669adbdf 100644 --- a/grafana-dashboards/cfk/kafka-consumer.json +++ b/grafana-dashboards/cfk/kafka-consumer.json @@ -267,7 +267,7 @@ } ] }, - "unit": "s" + "unit": "none" }, "overrides": [] }, @@ -305,7 +305,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10, kafka_consumer_consumer_coordinator_metrics_last_rebalance_seconds_ago{namespace=\"$env\", client_type=\"producer\", client_id=~\"$client_id\", pod=~\"$server\"} > 0)", + "expr": "topk(10, kafka_consumer_consumer_coordinator_metrics_rebalance_rate_per_hour{namespace=\"$env\", client_type=\"producer\", client_id=~\"$client_id\", pod=~\"$server\"} > 0)", "format": "time_series", "hide": false, "instant": false, @@ -320,7 +320,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "Last Rebalance Seconds Ago", + "title": "Rebalance Rate per hour", "transformations": [], "transparent": false, "type": "stat" @@ -341,12 +341,20 @@ "mode": "absolute", "steps": [ { - "color": "blue", + "color": "green", "index": 0, "line": true, "op": "gt", "value": "null", "yaxis": "left" + }, + { + "color": "red", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" } ] }, @@ -385,6 +393,89 @@ "repeat": null, "repeatDirection": null, "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10, kafka_consumer_consumer_coordinator_metrics_failed_rebalance_rate_per_hour{namespace=\"$env\", client_type=\"producer\", client_id=~\"$client_id\", pod=~\"$server\"} > 0)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Failed Rebalance Rate per hour", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 16, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 6, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { "datasource": null, @@ -431,7 +522,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 6, + "id": 7, "interval": null, "links": [], "maxDataPoints": 100, @@ -492,7 +583,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 7, + "id": 8, "interval": null, "links": [], "maxDataPoints": 100, @@ -592,7 +683,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 8, + "id": 9, "interval": null, "links": [], "maxDataPoints": 100, @@ -692,7 +783,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 9, + "id": 10, "interval": null, "links": [], "maxDataPoints": 100, @@ -792,7 +883,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 10, + "id": 11, "interval": null, "links": [], "maxDataPoints": 100, @@ -892,7 +983,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 11, + "id": 12, "interval": null, "links": [], "maxDataPoints": 100, @@ -1006,7 +1097,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 12, + "id": 13, "interval": null, "links": [], "maxDataPoints": 100, @@ -1120,7 +1211,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 13, + "id": 14, "interval": null, "links": [], "maxDataPoints": 100, @@ -1179,106 +1270,6 @@ "transformations": [], "transparent": false, "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 3 - }, - "height": null, - "hideTimeOverride": false, - "id": 14, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_records_per_request_avg{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{pod}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Records per Request Avg.", - "transformations": [], - "transparent": false, - "type": "timeseries" } ], "repeat": null, @@ -2341,6 +2332,234 @@ "repeat": null, "repeatDirection": null, "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_rebalance_rate_per_hour{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_failed_rebalance_rate_per_hour{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} (failed)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Rebalance Rate Per Hour", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 26, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_rebalance_latency_avg{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_rebalance_latency_max{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{pod}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Rebalance Latency", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 27, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { "datasource": null, @@ -2399,7 +2618,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 26, + "id": 28, "interval": null, "links": [], "maxDataPoints": 100, @@ -2460,7 +2679,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 27, + "id": 29, "interval": null, "links": [], "maxDataPoints": 100, @@ -2560,7 +2779,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 28, + "id": 30, "interval": null, "links": [], "maxDataPoints": 100, @@ -2660,7 +2879,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 29, + "id": 31, "interval": null, "links": [], "maxDataPoints": 100, @@ -2760,7 +2979,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 30, + "id": 32, "interval": null, "links": [], "maxDataPoints": 100, @@ -2860,7 +3079,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 31, + "id": 33, "interval": null, "links": [], "maxDataPoints": 100, @@ -2960,7 +3179,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 32, + "id": 34, "interval": null, "links": [], "maxDataPoints": 100, @@ -3060,7 +3279,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 33, + "id": 35, "interval": null, "links": [], "maxDataPoints": 100, @@ -3160,7 +3379,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 34, + "id": 36, "interval": null, "links": [], "maxDataPoints": 100, @@ -3241,7 +3460,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 35, + "id": 37, "interval": null, "links": [], "maxDataPoints": 100, @@ -3302,7 +3521,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 36, + "id": 38, "interval": null, "links": [], "maxDataPoints": 100, @@ -3402,7 +3621,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 37, + "id": 39, "interval": null, "links": [], "maxDataPoints": 100, @@ -3502,7 +3721,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 38, + "id": 40, "interval": null, "links": [], "maxDataPoints": 100, @@ -3616,7 +3835,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 39, + "id": 41, "interval": null, "links": [], "maxDataPoints": 100, @@ -3716,7 +3935,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 40, + "id": 42, "interval": null, "links": [], "maxDataPoints": 100, @@ -3797,7 +4016,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 41, + "id": 43, "interval": null, "links": [], "maxDataPoints": 100, @@ -3858,7 +4077,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 42, + "id": 44, "interval": null, "links": [], "maxDataPoints": 100, @@ -3958,7 +4177,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 43, + "id": 45, "interval": null, "links": [], "maxDataPoints": 100, @@ -4058,7 +4277,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 44, + "id": 46, "interval": null, "links": [], "maxDataPoints": 100, @@ -4172,7 +4391,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 45, + "id": 47, "interval": null, "links": [], "maxDataPoints": 100, diff --git a/grafana-dashboards/cfk/kafka-quotas.json b/grafana-dashboards/cfk/kafka-quotas.json new file mode 100644 index 00000000..8895d515 --- /dev/null +++ b/grafana-dashboards/cfk/kafka-quotas.json @@ -0,0 +1,779 @@ +{ + "__inputs": [ + { + "description": "", + "label": "Prometheus", + "name": "DS_PROMETHEUS", + "pluginId": "prometheus", + "pluginName": "Prometheus", + "type": "datasource" + } + ], + "annotations": { + "list": [] + }, + "description": "Overview of the Kafka quotass", + "editable": true, + "gnetId": null, + "hideControls": false, + "id": null, + "links": [], + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 12, + "w": 8, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 1, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_server_produce_byte_rate{namespace=\"$env\",user=~\"$user\",client_id=~\"$client_id\", pod=~\"$broker\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "User:{{user}} | Client ID:{{client_id}} @ Broker:{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Produce Byte Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 12, + "w": 8, + "x": 8, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 2, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_server_fetch_byte_rate{namespace=\"$env\",user=~\"$user\",client_id=~\"$client_id\", pod=~\"$broker\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "User:{{user}} | Client ID:{{client_id}} @ Broker:{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Fetch Byte Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 12, + "w": 8, + "x": 16, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 3, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_server_request_request_time{namespace=\"$env\",user=~\"$user\",client_id=~\"$client_id\", pod=~\"$broker\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "User:{{user}} | Client ID:{{client_id}} @ Broker:{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Request Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 12, + "w": 8, + "x": 0, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 4, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_server_produce_throttle_time{namespace=\"$env\",user=~\"$user\",client_id=~\"$client_id\", pod=~\"$broker\"} > 0)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "User:{{user}} | Client ID:{{client_id}} @ Broker:{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Produce Throttle Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 12, + "w": 8, + "x": 8, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 5, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_server_fetch_throttle_time{namespace=\"$env\",user=~\"$user\",client_id=~\"$client_id\", pod=~\"$broker\"} > 0)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "User:{{user}} | Client ID:{{client_id}} @ Broker:{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Fetch Throttle Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 12, + "w": 8, + "x": 16, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 6, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_server_request_throttle_time{namespace=\"$env\",user=~\"$user\",client_id=~\"$client_id\", pod=~\"$broker\"} > 0)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "User:{{user}} | Client ID:{{client_id}} @ Broker:{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Request Throttle Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "refresh": "30s", + "rows": [], + "schemaVersion": 12, + "sharedCrosshair": false, + "style": "dark", + "tags": [ + "confluent", + "kafka-client", + "kafka-quota" + ], + "templating": { + "list": [ + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "Prometheus", + "hide": 0, + "includeAll": false, + "label": "Environment", + "multi": false, + "name": "env", + "options": [], + "query": "label_values(namespace)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "Prometheus", + "hide": 0, + "includeAll": true, + "label": "Broker", + "multi": true, + "name": "broker", + "options": [], + "query": "label_values(kafka_server_produce_byte_rate{namespace=\"$env\"},pod)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "Prometheus", + "hide": 0, + "includeAll": true, + "label": "User", + "multi": true, + "name": "user", + "options": [], + "query": "label_values(user)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "Prometheus", + "hide": 0, + "includeAll": true, + "label": "Client ID", + "multi": true, + "name": "client_id", + "options": [], + "query": "label_values(client_id)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "hidden": false, + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Kafka Quotas - v2", + "uid": null, + "version": 0 +} diff --git a/grafana-dashboards/default/kafka-consumer.json b/grafana-dashboards/default/kafka-consumer.json index f82e02b3..d0305287 100644 --- a/grafana-dashboards/default/kafka-consumer.json +++ b/grafana-dashboards/default/kafka-consumer.json @@ -267,7 +267,7 @@ } ] }, - "unit": "s" + "unit": "none" }, "overrides": [] }, @@ -305,7 +305,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10, kafka_consumer_consumer_coordinator_metrics_last_rebalance_seconds_ago{env=\"$env\", client_type=\"producer\", client_id=~\"$client_id\", hostname=~\"$server\"} > 0)", + "expr": "topk(10, kafka_consumer_consumer_coordinator_metrics_rebalance_rate_per_hour{env=\"$env\", client_type=\"producer\", client_id=~\"$client_id\", hostname=~\"$server\"} > 0)", "format": "time_series", "hide": false, "instant": false, @@ -320,7 +320,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "Last Rebalance Seconds Ago", + "title": "Rebalance Rate per hour", "transformations": [], "transparent": false, "type": "stat" @@ -341,12 +341,20 @@ "mode": "absolute", "steps": [ { - "color": "blue", + "color": "green", "index": 0, "line": true, "op": "gt", "value": "null", "yaxis": "left" + }, + { + "color": "red", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" } ] }, @@ -385,6 +393,89 @@ "repeat": null, "repeatDirection": null, "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10, kafka_consumer_consumer_coordinator_metrics_failed_rebalance_rate_per_hour{env=\"$env\", client_type=\"producer\", client_id=~\"$client_id\", hostname=~\"$server\"} > 0)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Failed Rebalance Rate per hour", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 16, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 6, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { "datasource": null, @@ -431,7 +522,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 6, + "id": 7, "interval": null, "links": [], "maxDataPoints": 100, @@ -492,7 +583,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 7, + "id": 8, "interval": null, "links": [], "maxDataPoints": 100, @@ -592,7 +683,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 8, + "id": 9, "interval": null, "links": [], "maxDataPoints": 100, @@ -692,7 +783,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 9, + "id": 10, "interval": null, "links": [], "maxDataPoints": 100, @@ -792,7 +883,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 10, + "id": 11, "interval": null, "links": [], "maxDataPoints": 100, @@ -892,7 +983,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 11, + "id": 12, "interval": null, "links": [], "maxDataPoints": 100, @@ -1006,7 +1097,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 12, + "id": 13, "interval": null, "links": [], "maxDataPoints": 100, @@ -1120,7 +1211,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 13, + "id": 14, "interval": null, "links": [], "maxDataPoints": 100, @@ -1179,106 +1270,6 @@ "transformations": [], "transparent": false, "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 3 - }, - "height": null, - "hideTimeOverride": false, - "id": 14, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_records_per_request_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Records per Request Avg.", - "transformations": [], - "transparent": false, - "type": "timeseries" } ], "repeat": null, @@ -2341,6 +2332,234 @@ "repeat": null, "repeatDirection": null, "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_rebalance_rate_per_hour{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_failed_rebalance_rate_per_hour{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} (failed)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Rebalance Rate Per Hour", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 26, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_rebalance_latency_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_rebalance_latency_max{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Rebalance Latency", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 27, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { "datasource": null, @@ -2399,7 +2618,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 26, + "id": 28, "interval": null, "links": [], "maxDataPoints": 100, @@ -2460,7 +2679,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 27, + "id": 29, "interval": null, "links": [], "maxDataPoints": 100, @@ -2560,7 +2779,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 28, + "id": 30, "interval": null, "links": [], "maxDataPoints": 100, @@ -2660,7 +2879,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 29, + "id": 31, "interval": null, "links": [], "maxDataPoints": 100, @@ -2760,7 +2979,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 30, + "id": 32, "interval": null, "links": [], "maxDataPoints": 100, @@ -2860,7 +3079,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 31, + "id": 33, "interval": null, "links": [], "maxDataPoints": 100, @@ -2960,7 +3179,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 32, + "id": 34, "interval": null, "links": [], "maxDataPoints": 100, @@ -3060,7 +3279,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 33, + "id": 35, "interval": null, "links": [], "maxDataPoints": 100, @@ -3160,7 +3379,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 34, + "id": 36, "interval": null, "links": [], "maxDataPoints": 100, @@ -3241,7 +3460,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 35, + "id": 37, "interval": null, "links": [], "maxDataPoints": 100, @@ -3302,7 +3521,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 36, + "id": 38, "interval": null, "links": [], "maxDataPoints": 100, @@ -3402,7 +3621,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 37, + "id": 39, "interval": null, "links": [], "maxDataPoints": 100, @@ -3502,7 +3721,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 38, + "id": 40, "interval": null, "links": [], "maxDataPoints": 100, @@ -3616,7 +3835,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 39, + "id": 41, "interval": null, "links": [], "maxDataPoints": 100, @@ -3716,7 +3935,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 40, + "id": 42, "interval": null, "links": [], "maxDataPoints": 100, @@ -3797,7 +4016,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 41, + "id": 43, "interval": null, "links": [], "maxDataPoints": 100, @@ -3858,7 +4077,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 42, + "id": 44, "interval": null, "links": [], "maxDataPoints": 100, @@ -3958,7 +4177,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 43, + "id": 45, "interval": null, "links": [], "maxDataPoints": 100, @@ -4058,7 +4277,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 44, + "id": 46, "interval": null, "links": [], "maxDataPoints": 100, @@ -4172,7 +4391,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 45, + "id": 47, "interval": null, "links": [], "maxDataPoints": 100, diff --git a/grafana-dashboards/default/kafka-quotas.json b/grafana-dashboards/default/kafka-quotas.json new file mode 100644 index 00000000..b22c758b --- /dev/null +++ b/grafana-dashboards/default/kafka-quotas.json @@ -0,0 +1,779 @@ +{ + "__inputs": [ + { + "description": "", + "label": "Prometheus", + "name": "DS_PROMETHEUS", + "pluginId": "prometheus", + "pluginName": "Prometheus", + "type": "datasource" + } + ], + "annotations": { + "list": [] + }, + "description": "Overview of the Kafka quotass", + "editable": true, + "gnetId": null, + "hideControls": false, + "id": null, + "links": [], + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 12, + "w": 8, + "x": 0, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 1, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_server_produce_byte_rate{env=\"$env\",user=~\"$user\",client_id=~\"$client_id\", hostname=~\"$broker\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "User:{{user}} | Client ID:{{client_id}} @ Broker:{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Produce Byte Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 12, + "w": 8, + "x": 8, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 2, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_server_fetch_byte_rate{env=\"$env\",user=~\"$user\",client_id=~\"$client_id\", hostname=~\"$broker\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "User:{{user}} | Client ID:{{client_id}} @ Broker:{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Fetch Byte Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "percent" + }, + "overrides": [] + }, + "gridPos": { + "h": 12, + "w": 8, + "x": 16, + "y": 0 + }, + "height": null, + "hideTimeOverride": false, + "id": 3, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_server_request_request_time{env=\"$env\",user=~\"$user\",client_id=~\"$client_id\", hostname=~\"$broker\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "User:{{user}} | Client ID:{{client_id}} @ Broker:{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Request Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 12, + "w": 8, + "x": 0, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 4, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_server_produce_throttle_time{env=\"$env\",user=~\"$user\",client_id=~\"$client_id\", hostname=~\"$broker\"} > 0)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "User:{{user}} | Client ID:{{client_id}} @ Broker:{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Produce Throttle Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 12, + "w": 8, + "x": 8, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 5, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_server_fetch_throttle_time{env=\"$env\",user=~\"$user\",client_id=~\"$client_id\", hostname=~\"$broker\"} > 0)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "User:{{user}} | Client ID:{{client_id}} @ Broker:{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Fetch Throttle Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 12, + "w": 8, + "x": 16, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 6, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_server_request_throttle_time{env=\"$env\",user=~\"$user\",client_id=~\"$client_id\", hostname=~\"$broker\"} > 0)", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "User:{{user}} | Client ID:{{client_id}} @ Broker:{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Request Throttle Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "refresh": "30s", + "rows": [], + "schemaVersion": 12, + "sharedCrosshair": false, + "style": "dark", + "tags": [ + "confluent", + "kafka-client", + "kafka-quota" + ], + "templating": { + "list": [ + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "Prometheus", + "hide": 0, + "includeAll": false, + "label": "Environment", + "multi": false, + "name": "env", + "options": [], + "query": "label_values(env)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "Prometheus", + "hide": 0, + "includeAll": true, + "label": "Broker", + "multi": true, + "name": "broker", + "options": [], + "query": "label_values(kafka_server_produce_byte_rate{env=\"$env\"},hostname)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "Prometheus", + "hide": 0, + "includeAll": true, + "label": "User", + "multi": true, + "name": "user", + "options": [], + "query": "label_values(user)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "Prometheus", + "hide": 0, + "includeAll": true, + "label": "Client ID", + "multi": true, + "name": "client_id", + "options": [], + "query": "label_values(client_id)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "hidden": false, + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Kafka Quotas - v2", + "uid": null, + "version": 0 +} diff --git a/grafana-dashboards/kafka-consumer.py b/grafana-dashboards/kafka-consumer.py index 620815d1..3a4a94cc 100644 --- a/grafana-dashboards/kafka-consumer.py +++ b/grafana-dashboards/kafka-consumer.py @@ -91,13 +91,13 @@ def dashboard(env_label="namespace", server_label="pod"): gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 1, y=0), ), G.Stat( - title="Last Rebalance Seconds Ago", + title="Rebalance Rate per hour", dataSource="${DS_PROMETHEUS}", targets=[ G.Target( expr="topk(" + topk - + ", kafka_consumer_consumer_coordinator_metrics_last_rebalance_seconds_ago{" + + ", kafka_consumer_consumer_coordinator_metrics_rebalance_rate_per_hour{" + env_label + '="$env", client_type="producer", client_id=~"$client_id", ' + server_label @@ -106,7 +106,6 @@ def dashboard(env_label="namespace", server_label="pod"): ), ], reduceCalc="last", - format="s", thresholds=[ G.Threshold(index=0, value=0.0, color="green"), G.Threshold(index=1, value=1.0, color="yellow"), @@ -114,6 +113,28 @@ def dashboard(env_label="namespace", server_label="pod"): ], gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 2, y=0), ), + G.Stat( + title="Failed Rebalance Rate per hour", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ", kafka_consumer_consumer_coordinator_metrics_failed_rebalance_rate_per_hour{" + + env_label + + '="$env", client_type="producer", client_id=~"$client_id", ' + + server_label + + '=~"$server"} > 0)', + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + ], + reduceCalc="last", + thresholds=[ + G.Threshold(index=0, value=0.0, color="green"), + G.Threshold(index=1, value=1.0, color="red"), + ], + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 3, y=0), + ), G.Stat( title="Versions", dataSource="${DS_PROMETHEUS}", @@ -131,7 +152,7 @@ def dashboard(env_label="namespace", server_label="pod"): thresholds=[ G.Threshold(index=0, value=0.0, color="blue"), ], - gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 3, y=0), + gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 4, y=0), ), ] @@ -318,27 +339,6 @@ def dashboard(env_label="namespace", server_label="pod"): h=default_height * 2, w=ts_width, x=ts_width * 0, y=performance_base + 2 ), ), - G.TimeSeries( - title="Records per Request Avg.", - dataSource="${DS_PROMETHEUS}", - targets=[ - G.Target( - expr="topk(" - + topk - + ",kafka_consumer_consumer_fetch_manager_metrics_fetch_records_per_request_avg{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', - legendFormat="{{client_id}}@{{" + server_label + "}}", - ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - gridPos=G.GridPos( - h=default_height * 2, w=ts_width, x=ts_width * 2, y=performance_base + 2 - ), - ), ] performance_panels = [ G.RowPanel( @@ -578,6 +578,68 @@ def dashboard(env_label="namespace", server_label="pod"): ), ), + G.TimeSeries( + title="Rebalance Rate Per Hour", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_coordinator_metrics_rebalance_rate_per_hour{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + server_label + "}}", + ), + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_coordinator_metrics_failed_rebalance_rate_per_hour{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + server_label + "}} (failed)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=performance_base + 3 + ), + ), + G.TimeSeries( + title="Rebalance Latency", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_coordinator_metrics_rebalance_latency_avg{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + server_label + "}} (avg.)", + ), + G.Target( + expr="topk(" + + topk + + ",kafka_consumer_consumer_coordinator_metrics_rebalance_latency_max{" + + env_label + + '="$env",client_id=~"$client_id", ' + + server_label + + '=~"$server"})', + legendFormat="{{client_id}}@{{" + server_label + "}} (max.)", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=performance_base + 3 + ), + ), G.TimeSeries( title="Assigned Partitions", dataSource="${DS_PROMETHEUS}", @@ -596,7 +658,7 @@ def dashboard(env_label="namespace", server_label="pod"): legendDisplayMode="table", legendCalcs=["max", "mean", "last"], gridPos=G.GridPos( - h=default_height * 2, w=ts_width, x=ts_width * 0, y=performance_base + 3 + h=default_height * 2, w=ts_width, x=ts_width * 2, y=performance_base + 3 ), ), ] diff --git a/grafana-dashboards/kafka-quotas.py b/grafana-dashboards/kafka-quotas.py new file mode 100644 index 00000000..251f768a --- /dev/null +++ b/grafana-dashboards/kafka-quotas.py @@ -0,0 +1,220 @@ +import os +import grafanalib.core as G + + +def dashboard(env_label="namespace", server_label="pod"): + default_height = 6 + ts_width = 8 + + templating = G.Templating( + list=[ + G.Template( + name="env", + label="Environment", + dataSource="Prometheus", + query="label_values(" + env_label + ")", + ), + G.Template( + name="broker", + label="Broker", + dataSource="Prometheus", + query="label_values(kafka_server_produce_byte_rate{" + + env_label + + '="$env"},' + + server_label + + ")", + multi=True, + includeAll=True, + ), + G.Template( + name="user", + label="User", + dataSource="Prometheus", + query="label_values(user)", + multi=True, + includeAll=True, + ), + G.Template( + name="client_id", + label="Client ID", + dataSource="Prometheus", + query="label_values(client_id)", + multi=True, + includeAll=True, + ), + ] + ) + + topk = "10" + + panels = [ + G.TimeSeries( + title="Produce Byte Rate", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_server_produce_byte_rate{" + + env_label + + '="$env",user=~"$user",client_id=~"$client_id", ' + + server_label + + '=~"$broker"})', + legendFormat="User:{{user}} | Client ID:{{client_id}} @ Broker:{{" + + server_label + + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="binBps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=0 + ), + ), + G.TimeSeries( + title="Fetch Byte Rate", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_server_fetch_byte_rate{" + + env_label + + '="$env",user=~"$user",client_id=~"$client_id", ' + + server_label + + '=~"$broker"})', + legendFormat="User:{{user}} | Client ID:{{client_id}} @ Broker:{{" + + server_label + + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="binBps", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=0 + ), + ), + G.TimeSeries( + title="Request Time", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_server_request_request_time{" + + env_label + + '="$env",user=~"$user",client_id=~"$client_id", ' + + server_label + + '=~"$broker"})', + legendFormat="User:{{user}} | Client ID:{{client_id}} @ Broker:{{" + + server_label + + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="percent", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=0 + ), + ), + + + G.TimeSeries( + title="Produce Throttle Time", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_server_produce_throttle_time{" + + env_label + + '="$env",user=~"$user",client_id=~"$client_id", ' + + server_label + + '=~"$broker"} > 0)', + legendFormat="User:{{user}} | Client ID:{{client_id}} @ Broker:{{" + + server_label + + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 0, y=1 + ), + ), + G.TimeSeries( + title="Fetch Throttle Time", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_server_fetch_throttle_time{" + + env_label + + '="$env",user=~"$user",client_id=~"$client_id", ' + + server_label + + '=~"$broker"} > 0)', + legendFormat="User:{{user}} | Client ID:{{client_id}} @ Broker:{{" + + server_label + + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 1, y=1 + ), + ), + G.TimeSeries( + title="Request Throttle Time", + dataSource="${DS_PROMETHEUS}", + targets=[ + G.Target( + expr="topk(" + + topk + + ",kafka_server_request_throttle_time{" + + env_label + + '="$env",user=~"$user",client_id=~"$client_id", ' + + server_label + + '=~"$broker"} > 0)', + legendFormat="User:{{user}} | Client ID:{{client_id}} @ Broker:{{" + + server_label + + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=1 + ), + ), + ] + + return G.Dashboard( + title="Kafka Quotas - v2", + description="Overview of the Kafka quotass", + tags=["confluent", "kafka-client", "kafka-quota"], + inputs=[ + G.DataSourceInput( + name="DS_PROMETHEUS", + label="Prometheus", + pluginId="prometheus", + pluginName="Prometheus", + ) + ], + templating=templating, + timezone="browser", + panels=panels, + refresh="30s", + ).auto_panel_ids() + + +env_label = os.environ.get("ENV_LABEL", "env") +server_label = os.environ.get("SERVER_LABEL", "hostname") +dashboard = dashboard(env_label, server_label) From c08d69d05404b5f41d50dbbf9e201dcbd5d842d7 Mon Sep 17 00:00:00 2001 From: Jorge Esteban Quilcate Otoya Date: Tue, 12 Jul 2022 14:18:45 +0100 Subject: [PATCH 23/28] feat: add docs and ds variable --- grafana-dashboards/Makefile | 2 + grafana-dashboards/README.md | 48 ++ .../cfk/confluent-platform.json | 143 ++-- grafana-dashboards/cfk/kafka-cluster.json | 130 ++-- .../cfk/kafka-connect-cluster.json | 92 +-- grafana-dashboards/cfk/kafka-consumer.json | 82 +-- grafana-dashboards/cfk/kafka-producer.json | 74 +- grafana-dashboards/cfk/kafka-quotas.json | 12 +- grafana-dashboards/cfk/kafka-topics.json | 16 +- grafana-dashboards/cfk/ksqldb-cluster.json | 86 +-- .../cfk/schema-registry-cluster.json | 28 +- grafana-dashboards/cfk/zookeeper-cluster.json | 95 ++- grafana-dashboards/confluent-platform.py | 329 ++++++--- .../default/confluent-platform.json | 189 +++-- grafana-dashboards/default/kafka-cluster.json | 224 +++--- .../default/kafka-connect-cluster.json | 92 +-- .../default/kafka-consumer.json | 82 +-- .../default/kafka-producer.json | 74 +- grafana-dashboards/default/kafka-quotas.json | 12 +- grafana-dashboards/default/kafka-topics.json | 16 +- .../default/ksqldb-cluster.json | 86 +-- .../default/schema-registry-cluster.json | 28 +- .../default/zookeeper-cluster.json | 95 ++- grafana-dashboards/kafka-cluster.py | 655 ++++++++++-------- grafana-dashboards/kafka-connect-cluster.py | 98 ++- grafana-dashboards/kafka-consumer.py | 100 ++- grafana-dashboards/kafka-producer.py | 74 +- grafana-dashboards/kafka-quotas.py | 38 +- grafana-dashboards/kafka-topics.py | 16 +- grafana-dashboards/ksqldb-cluster.py | 92 ++- grafana-dashboards/schema-registry-cluster.py | 27 +- grafana-dashboards/zookeeper-cluster.py | 67 +- 32 files changed, 1839 insertions(+), 1363 deletions(-) create mode 100644 grafana-dashboards/README.md diff --git a/grafana-dashboards/Makefile b/grafana-dashboards/Makefile index 4fbffb0d..0d542fd2 100644 --- a/grafana-dashboards/Makefile +++ b/grafana-dashboards/Makefile @@ -3,6 +3,7 @@ all: $(MAKE) cfk def: OUTPUT_DIR=default +def: export DATASOURCE=Prometheus def: export ENV_LABEL=env def: export SERVER_LABEL=hostname def: export KSQLDB_CLUSTER_LABEL=ksqldb_cluster_id @@ -25,6 +26,7 @@ dashboards: @generate-dashboard kafka-quotas.py -o $(OUTPUT_DIR)/kafka-quotas.json cfk: OUTPUT_DIR=cfk +cfk: export DATASOURCE=$${DS_PROMETHEUS} cfk: export ENV_LABEL=namespace cfk: export SERVER_LABEL=pod cfk: export KSQLDB_CLUSTER_LABEL=app diff --git a/grafana-dashboards/README.md b/grafana-dashboards/README.md new file mode 100644 index 00000000..2a4a6445 --- /dev/null +++ b/grafana-dashboards/README.md @@ -0,0 +1,48 @@ +# Grafana dashboards for Confluent Platform + +## Dashboards + +- Confluent Platform overview: main metrics from all Confluent components. +- Kafka Cluster: Kafka cluster heath and performance metrics. +- Kafka Topics: Kafka topics throughput metrics. +- Schema Registry Cluster: Servers and subjects/schemas metrics. +- Kafka Connect Cluster: Connect workers and connectors metrics. +- ksqlDB Cluster: Servers and queries metrics. +- Kafka Producer: Kafka producer client metrics. +- Kafka Consumer: Kafka consumer client metrics. +- Kafka Quotas: Kafka quotas and throttling metrics. + +## How to build + +Install `grafanalib` library: + +```shell +pip3 install grafanalib +``` + +Run makefile: + +```shell +make +``` + +This execution generates the grafana dashboard JSON files on directories `default/` for Docker/VM-based deployments and `cfk/` for Confluent-for-Kubernetes-based deployments. + +## How to use + +Grafana dashboards expect the following labels: + +- Environment: + - Default: `env` + - CFK: `namespace` +- Server label: + - Default: `hostname` + - CFK: `pod` +- Cluster labels: + - Connect: + - Default: `kafka_connect_cluster_id` + - CFK: `app` + - ksqlDB: + - Default: `ksqldb_cluster_id` + - CFK: `app` + diff --git a/grafana-dashboards/cfk/confluent-platform.json b/grafana-dashboards/cfk/confluent-platform.json index 1605d89b..d94f8840 100644 --- a/grafana-dashboards/cfk/confluent-platform.json +++ b/grafana-dashboards/cfk/confluent-platform.json @@ -63,7 +63,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "Quorum Size of Zookeeper ensemble.\n Count Zookeeper servers with quorum size metric.\n ", "editable": true, "error": false, "fieldConfig": { @@ -162,7 +162,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "Average size of ZNodes in the cluster.\n Getting the node count per server, and averaging the node count.\n ", "editable": true, "error": false, "fieldConfig": { @@ -237,7 +237,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "ZK: Avg. number of ZNodes", + "title": "ZK: ZNodes (avg.)", "transformations": [], "transparent": false, "type": "stat" @@ -245,7 +245,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "Sum of the number of alive connections per servers divided by the maximum number of client connections allowed per host.\n If the percentage is higher than 60%, then Zookeeper should be scaled and/or the Zookeeper clients should be investigated to find the reason for high number of connections opened.\n ", "editable": true, "error": false, "fieldConfig": { @@ -258,16 +258,32 @@ "mode": "absolute", "steps": [ { - "color": "blue", + "color": "green", "index": 0, "line": true, "op": "gt", "value": "null", "yaxis": "left" + }, + { + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 0.6, + "yaxis": "left" + }, + { + "color": "red", + "index": 2, + "line": true, + "op": "gt", + "value": 0.8, + "yaxis": "left" } ] }, - "unit": "none" + "unit": "percentunit" }, "overrides": [] }, @@ -305,7 +321,7 @@ "targets": [ { "datasource": null, - "expr": "sum(zookeeper_numaliveconnections{namespace=\"$env\"})", + "expr": "zookeeper_numaliveconnections{namespace=\"$env\"} / zookeeper_maxclientcnxnsperhost{namespace=\"$env\"}", "format": "time_series", "hide": false, "instant": false, @@ -320,7 +336,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "ZK: Sum of number of Alive Connections", + "title": "ZK: Connections used", "transformations": [], "transparent": false, "type": "stat" @@ -328,7 +344,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "Sum of client watchers subscribed to changes on the ZNodes.\n ", "editable": true, "error": false, "fieldConfig": { @@ -411,7 +427,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "Number of requests waiting for processing (queued).\n If the number of outstanding requests grows higher than 10, then the Zookeeper hosts should be checked.\n It could mean that there is not enough resources to cope with the number of requests.\n ", "editable": true, "error": false, "fieldConfig": { @@ -448,7 +464,32 @@ "mappings": [], "thresholds": { "mode": "absolute", - "steps": [] + "steps": [ + { + "color": "green", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + }, + { + "color": "red", + "index": 2, + "line": true, + "op": "gt", + "value": 10.0, + "yaxis": "left" + } + ] }, "unit": "" }, @@ -551,7 +592,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "Count of brokers available (online).\n This value is referential and should not be used for alerting.\n ", "editable": true, "error": false, "fieldConfig": { @@ -634,7 +675,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "Active Controller broker.\n It should always be 1. If the value is different than 1, then it must be alerted for troubleshooting.\n ", "editable": true, "error": false, "fieldConfig": { @@ -717,7 +758,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "Sum of Topic partitions across the cluster.\n ", "editable": true, "error": false, "fieldConfig": { @@ -792,7 +833,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "Kafka: Sum of Partitioenv", + "title": "Kafka: Sum of Partitions", "transformations": [], "transparent": false, "type": "stat" @@ -800,7 +841,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "Sum of Under-Replicated Partitions. This is caused by broker or volumes unavailable, impacting replicas to be offline, and reducing the ISR set for those partitions.\n There are transient scenarios that could lead to this number growing (e.g. broker restart), but if the number doesn't shrink in a short period of time (e.g. 1 minute), then it's recommended to alert.\n ", "editable": true, "error": false, "fieldConfig": { @@ -883,7 +924,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "Kafka: Sum of Partitions Under-Replicated (URP)", + "title": "Kafka: Sum of Under-Replicated Partitions (URP)", "transformations": [], "transparent": false, "type": "stat" @@ -891,7 +932,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "Number of partitions where the number of replicas offline is higher than the minimum ISR configuration.\n This means partitions are not available for Producers with acks=all.\n It's recommended alerting when this values is higher than 0.\n ", "editable": true, "error": false, "fieldConfig": { @@ -974,7 +1015,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "Kafka: Sum of Partitioenv Under-MinISR", + "title": "Kafka: Sum of Under-MinISR Partitions", "transformations": [], "transparent": false, "type": "stat" @@ -982,7 +1023,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "Number of partitions where all replicas are offline.\n Producers and Consumers are affected by this condition.\n It's recommended alerting when this values is higher than 0.\n ", "editable": true, "error": false, "fieldConfig": { @@ -1065,7 +1106,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "Kafka: Sum of Partitions Offline", + "title": "Kafka: Sum of Offline Partitions", "transformations": [], "transparent": false, "type": "stat" @@ -1114,7 +1155,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "Schema Registry online instances returning metrics.\n ", "editable": true, "error": false, "fieldConfig": { @@ -1213,7 +1254,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "Average number of registered schemas across the cluster.\n ", "editable": true, "error": false, "fieldConfig": { @@ -1276,7 +1317,7 @@ "expr": "avg(kafka_schema_registry_registered_count{namespace=\"$env\"})", "format": "time_series", "hide": false, - "instant": true, + "instant": false, "interval": "", "intervalFactor": 2, "legendFormat": "", @@ -1288,7 +1329,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "SR: Sum of Registered Schemas", + "title": "SR: Registered Schemas (avg.)", "transformations": [], "transparent": false, "type": "stat" @@ -1296,7 +1337,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "Average number of schemas created, by type.\n ", "editable": true, "error": false, "fieldConfig": { @@ -1371,7 +1412,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "SR: Sum of Created Schemas by Type", + "title": "SR: Schemas Created by Type (avg.)", "transformations": [], "transparent": false, "type": "stat" @@ -1379,7 +1420,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "Average number of schemas deleted, by type.\n ", "editable": true, "error": false, "fieldConfig": { @@ -1454,7 +1495,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "SR: Sum of Deleted Schemas by Type", + "title": "SR: Schemas Deleted by Type (avg.)", "transformations": [], "transparent": false, "type": "stat" @@ -1492,7 +1533,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "Kafka Connect online workers returning metrics.\n ", "editable": true, "error": false, "fieldConfig": { @@ -1575,7 +1616,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "Number of tasks deployed on Kafka Connect cluster.\n ", "editable": true, "error": false, "fieldConfig": { @@ -1658,7 +1699,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "Number of Running Tasks on the Kafka Connect cluster.\n Ideally, this number should be equal to the total number of tasks deployed.\n ", "editable": true, "error": false, "fieldConfig": { @@ -1749,7 +1790,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "Number of Paused Tasks on the Kafka Connect cluster.\n Ideally, this number should be zero, as tasks should be running.\n ", "editable": true, "error": false, "fieldConfig": { @@ -1840,7 +1881,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "Number of Paused Tasks on the Kafka Connect cluster.\n Ideally, this number should be zero, as tasks should be running.\n It's recommended alerting when this value is higher than 0.\n ", "editable": true, "error": false, "fieldConfig": { @@ -1853,7 +1894,7 @@ "mode": "absolute", "steps": [ { - "color": "blue", + "color": "green", "index": 0, "line": true, "op": "gt", @@ -1931,7 +1972,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "Informative value. Time since last rebalance.\n When this value is continuously and repeatedly low means some connectors are failing and rebalancing is triggered constantly.\n ", "editable": true, "error": false, "fieldConfig": { @@ -2056,7 +2097,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "ksqlDB online instances returning metrics.\n ", "editable": true, "error": false, "fieldConfig": { @@ -2116,7 +2157,7 @@ "targets": [ { "datasource": null, - "expr": "count(ksql_ksql_engine_query_stats_num_active_queries{namespace=\"$env\", app=\"$ksqldb_cluster\"})", + "expr": "count(ksql_ksql_engine_query_stats_num_active_queries{namespace=\"$env\",app=\"$ksqldb_cluster\"})", "format": "time_series", "hide": false, "instant": false, @@ -2131,7 +2172,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "ksqlDB: Online Servers", + "title": "ksqlDB: Online instances", "transformations": [], "transparent": false, "type": "stat" @@ -2139,7 +2180,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "Number of active queries deployed in the cluster.\n ", "editable": true, "error": false, "fieldConfig": { @@ -2199,7 +2240,7 @@ "targets": [ { "datasource": null, - "expr": "sum(ksql_ksql_engine_query_stats_num_active_queries{namespace=\"$env\", app=\"$ksqldb_cluster\"})", + "expr": "sum(ksql_ksql_engine_query_stats_num_active_queries{namespace=\"$env\",app=\"$ksqldb_cluster\"})", "format": "time_series", "hide": false, "instant": false, @@ -2222,7 +2263,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "Number of running queries deployed in the cluster.\n Ideally, this number should be equal to the number of active queries as queries should be running.\n ", "editable": true, "error": false, "fieldConfig": { @@ -2290,7 +2331,7 @@ "targets": [ { "datasource": null, - "expr": "sum(ksql_ksql_engine_query_stats_running_queries{namespace=\"$env\", app=\"$ksqldb_cluster\"})", + "expr": "sum(ksql_ksql_engine_query_stats_running_queries{namespace=\"$env\",app=\"$ksqldb_cluster\"})", "format": "time_series", "hide": false, "instant": false, @@ -2313,7 +2354,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "Number of queries rebalancing in the cluster.\n Ideally, this number should be equal zero, or return to zero in a short period (e.g. 1 minute).\n It's recommended to alert if the number of rebalancing queries stay higher than 0 for a longer period of time.\n ", "editable": true, "error": false, "fieldConfig": { @@ -2381,7 +2422,7 @@ "targets": [ { "datasource": null, - "expr": "sum(ksql_ksql_engine_query_stats_rebalancing_queries{namespace=\"$env\", app=\"$ksqldb_cluster\"})", + "expr": "sum(ksql_ksql_engine_query_stats_rebalancing_queries{namespace=\"$env\",app=\"$ksqldb_cluster\"})", "format": "time_series", "hide": false, "instant": false, @@ -2404,7 +2445,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "Number of queries failed in the cluster.\n Ideally, this number should be equal zero.\n It's recommended to alert if the number of queries failed is higher than 0.\n ", "editable": true, "error": false, "fieldConfig": { @@ -2472,7 +2513,7 @@ "targets": [ { "datasource": null, - "expr": "avg(ksql_ksql_engine_query_stats_error_queries{namespace=\"$env\", app=\"$ksqldb_cluster\"})", + "expr": "avg(ksql_ksql_engine_query_stats_error_queries{namespace=\"$env\",app=\"$ksqldb_cluster\"})", "format": "time_series", "hide": false, "instant": false, @@ -2487,7 +2528,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "Connect: Sum of Failed Queries", + "title": "Connect: Sum of Queries Failed", "transformations": [], "transparent": false, "type": "stat" @@ -2531,7 +2572,7 @@ "text": null, "value": null }, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "hide": 0, "includeAll": false, "label": "Environment", @@ -2558,7 +2599,7 @@ "text": null, "value": null }, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "hide": true, "includeAll": false, "label": "Kafka Connect cluster", @@ -2585,7 +2626,7 @@ "text": null, "value": null }, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "hide": true, "includeAll": false, "label": "ksqlDB cluster", diff --git a/grafana-dashboards/cfk/kafka-cluster.json b/grafana-dashboards/cfk/kafka-cluster.json index 9e2add0b..3db845de 100644 --- a/grafana-dashboards/cfk/kafka-cluster.json +++ b/grafana-dashboards/cfk/kafka-cluster.json @@ -55,7 +55,7 @@ "targets": [], "timeFrom": null, "timeShift": null, - "title": "Overview", + "title": "Cluster Overview", "transformations": [], "transparent": false, "type": "row" @@ -63,7 +63,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "Count of brokers available (online).\n ", "editable": true, "error": false, "fieldConfig": { @@ -146,7 +146,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "Active Controller broker.\n It should always be 1. If the value is different than 1, then it must be alerted for troubleshooting.\n ", "editable": true, "error": false, "fieldConfig": { @@ -229,7 +229,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "\n Number of partitions where the preferred replica is not the leader.\n Usually, this number is 0.\n Restarting nodes could cause this values to change, but when reassigning happens the value stabilize.\n ", "editable": true, "error": false, "fieldConfig": { @@ -304,7 +304,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "Kafka: Sum of Replica Imbalance", + "title": "Kafka: Sum of Preferred Replica Imbalance", "transformations": [], "transparent": false, "type": "stat" @@ -312,7 +312,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "Number of topics in the cluster.", "editable": true, "error": false, "fieldConfig": { @@ -395,7 +395,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "Sum of requests per second rated over a 5 min. period.\n Gives an idea of the processing load in the cluster.", "editable": true, "error": false, "fieldConfig": { @@ -478,7 +478,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "Sum of log sizes per broker.\n This must be compared with the total storage space available in the brokers.", "editable": true, "error": false, "fieldConfig": { @@ -553,7 +553,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "Kafka: Logs Size", + "title": "Kafka: Log Size", "transformations": [], "transparent": false, "type": "stat" @@ -561,7 +561,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "Sum of Topic partitions across the cluster.\n ", "editable": true, "error": false, "fieldConfig": { @@ -644,7 +644,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "Sum of Under-Replicated Partitions. This is caused by broker or volumes unavailable, impacting replicas to be offline, and reducing the ISR set for those partitions.\n There are transient scenarios that could lead to this number growing (e.g. broker restart), but if the number doesn't shrink in a short period of time (e.g. 1 minute), then it's recommended to alert.\n ", "editable": true, "error": false, "fieldConfig": { @@ -727,7 +727,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "Kafka: Sum of Partitions Under-Replicated (URP)", + "title": "Kafka: Sum of Under-Replicated Partitions (URP)", "transformations": [], "transparent": false, "type": "stat" @@ -735,7 +735,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "Number of partitions where the number of replicas offline is higher than the minimum ISR configuration.\n This means partitions are not available for Producers with acks=all.\n We recommend alerting when this values is higher than 0.\n ", "editable": true, "error": false, "fieldConfig": { @@ -818,7 +818,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "Kafka: Sum of Partitions Under-MinISR", + "title": "Kafka: Sum of Under-MinISR Partitions", "transformations": [], "transparent": false, "type": "stat" @@ -826,7 +826,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "Number of partitions where all replicas are offline.\n Producers and Consumers are affected by this condition.\n We recommend alerting when this values is higher than 0.\n ", "editable": true, "error": false, "fieldConfig": { @@ -909,7 +909,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "Kafka: Sum of Partitions Offline", + "title": "Kafka: Sum of Offline Partitions", "transformations": [], "transparent": false, "type": "stat" @@ -917,7 +917,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "Sum of bytes in per second rated over a 5 min. period.\n Gives an idea of the incoming throughput handle by the cluster.\n ", "editable": true, "error": false, "fieldConfig": { @@ -1000,7 +1000,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "Sum of bytes out per second rated over a 5 min. period.\n Gives an idea of the outgoing throughput handle by the cluster.\n ", "editable": true, "error": false, "fieldConfig": { @@ -1116,7 +1116,7 @@ "targets": [], "timeFrom": null, "timeShift": null, - "title": "System", + "title": "System resources", "transformations": [], "transparent": false, "type": "row" @@ -1124,7 +1124,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "Rate of CPU seconds used by the Java process.\n 100% usage represents one core. \n If there are multiple cores, the total capacity should be 100% * number_cores.", "editable": true, "error": false, "fieldConfig": { @@ -1224,7 +1224,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "Sum of JVM memory used, without including areas (e.g. heap size).", "editable": true, "error": false, "fieldConfig": { @@ -1324,7 +1324,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "Sum of seconds used by Garbage Collection.", "editable": true, "error": false, "fieldConfig": { @@ -1425,7 +1425,7 @@ "cacheTimeout": null, "collapsed": true, "datasource": null, - "description": "Bytes in/out per second", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -1454,7 +1454,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "Number of messages into topics per second, aggregated by sum without topic.", "editable": true, "error": false, "fieldConfig": { @@ -1554,7 +1554,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "Number of bytes into topics per second, aggregated by sum without topic.", "editable": true, "error": false, "fieldConfig": { @@ -1654,7 +1654,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "Number of bytes out of topics per second, aggregated by sum without topic.", "editable": true, "error": false, "fieldConfig": { @@ -1767,7 +1767,7 @@ "cacheTimeout": null, "collapsed": true, "datasource": null, - "description": "Internal thread pools usage", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -1796,7 +1796,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "Percent of time the network thread pool is used.\n It should be below 60% or the capacity of threads should be tuned or \n the cluster scaled to cope with the load.", "editable": true, "error": false, "fieldConfig": { @@ -1896,7 +1896,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "Percent of time the IO thread pool is used.\n It should be below 60% or the capacity of threads should be tuned or \n the cluster scaled to cope with the load.", "editable": true, "error": false, "fieldConfig": { @@ -2000,7 +2000,7 @@ "targets": [], "timeFrom": null, "timeShift": null, - "title": "Thread Utilization", + "title": "Thread utilization", "transformations": [], "transparent": false, "type": "row" @@ -2009,7 +2009,7 @@ "cacheTimeout": null, "collapsed": true, "datasource": null, - "description": "Sum of req/sec rates", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -2038,7 +2038,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "Requests per second rated over a 5 minutes period.\n Includes API call and version.\n ", "editable": true, "error": false, "fieldConfig": { @@ -2141,7 +2141,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "Request Errors per second rated over a 5 minutes period.\n Includes API call and version.\n ", "editable": true, "error": false, "fieldConfig": { @@ -2286,7 +2286,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "Sum of connections count across cluster by brokers", "editable": true, "error": false, "fieldConfig": { @@ -2386,7 +2386,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "Sum of rate of connections created across cluster by brokers", "editable": true, "error": false, "fieldConfig": { @@ -2486,7 +2486,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "Sum of rate of connections closed across cluster by brokers", "editable": true, "error": false, "fieldConfig": { @@ -2586,7 +2586,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "Sum of connections count across cluster by listeners", "editable": true, "error": false, "fieldConfig": { @@ -2686,7 +2686,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "Sum of rate of connections created across cluster by listener", "editable": true, "error": false, "fieldConfig": { @@ -2786,7 +2786,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "Sum of rate of connections closed across cluster by listener", "editable": true, "error": false, "fieldConfig": { @@ -2928,7 +2928,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "Rate of ISR shrinks per second.\n If this value is continuously higher than 0, then troubleshoot cause of ISR changing constantly.\n ", "editable": true, "error": false, "fieldConfig": { @@ -3028,7 +3028,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "Rate of ISR expands per second.\n If this value is continuously higher than 0, then troubleshoot cause of ISR changing constantly.\n ", "editable": true, "error": false, "fieldConfig": { @@ -3170,7 +3170,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "Time expend on the request queue.\n Moved from network socket to request queue by Network threads.\n ", "editable": true, "error": false, "fieldConfig": { @@ -3270,7 +3270,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "Time expend doing local IO.\n Moved from request queue to storage device operations by IO threads.\n ", "editable": true, "error": false, "fieldConfig": { @@ -3370,7 +3370,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "Time expend waiting for coordination with other brokers/internal condition.\n At purgatory.\n ", "editable": true, "error": false, "fieldConfig": { @@ -3470,7 +3470,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "Time expend waiting in response queue.\n Moved from purgatory to response queue by IO threads.\n ", "editable": true, "error": false, "fieldConfig": { @@ -3570,7 +3570,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "Time expend delivering response.\n Moved from response queue to client by Networkc threads.\n ", "editable": true, "error": false, "fieldConfig": { @@ -3712,7 +3712,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "Time expend on the request queue.\n Moved from network socket to request queue by Network threads.\n ", "editable": true, "error": false, "fieldConfig": { @@ -3812,7 +3812,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "Time expend doing local IO.\n Moved from request queue to storage device operations by IO threads.\n ", "editable": true, "error": false, "fieldConfig": { @@ -3912,7 +3912,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "Time expend waiting for coordination with other brokers/internal condition.\n At purgatory.\n ", "editable": true, "error": false, "fieldConfig": { @@ -4012,7 +4012,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "Time expend waiting in response queue.\n Moved from purgatory to response queue by IO threads.\n ", "editable": true, "error": false, "fieldConfig": { @@ -4112,7 +4112,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "Time expend delivering response.\n Moved from response queue to client by Networkc threads.\n ", "editable": true, "error": false, "fieldConfig": { @@ -4254,7 +4254,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "Time expend on the request queue.\n Moved from network socket to request queue by Network threads.\n ", "editable": true, "error": false, "fieldConfig": { @@ -4354,7 +4354,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "Time expend doing local IO.\n Moved from request queue to storage device operations by IO threads.\n ", "editable": true, "error": false, "fieldConfig": { @@ -4454,7 +4454,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "Time expend waiting for coordination with other brokers/internal condition.\n At purgatory.\n ", "editable": true, "error": false, "fieldConfig": { @@ -4554,7 +4554,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "Time expend waiting in response queue.\n Moved from purgatory to response queue by IO threads.\n ", "editable": true, "error": false, "fieldConfig": { @@ -4654,7 +4654,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "Time expend delivering response.\n Moved from response queue to client by Networkc threads.\n ", "editable": true, "error": false, "fieldConfig": { @@ -4796,7 +4796,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "Number of groups managed by Broker", "editable": true, "error": false, "fieldConfig": { @@ -4896,7 +4896,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "Number of stable groups managed by Broker", "editable": true, "error": false, "fieldConfig": { @@ -5046,7 +5046,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "Number of Groups per Broker", + "title": "Number of Groups per Broker per Status", "transformations": [], "transparent": false, "type": "timeseries" @@ -5096,7 +5096,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "Sum of produce message conversions per second.\n This value increases when the broker receives produce messages from clients using older versions.\n ", "editable": true, "error": false, "fieldConfig": { @@ -5196,7 +5196,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "Sum of fetch message conversions per second.\n This value increases when the broker receives fetch messages from clients using older versions.\n ", "editable": true, "error": false, "fieldConfig": { @@ -5296,7 +5296,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "Sum of connections aggregated by client version and name.\n ", "editable": true, "error": false, "fieldConfig": { @@ -5428,7 +5428,7 @@ "text": null, "value": null }, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "hide": 0, "includeAll": false, "label": "Environment", @@ -5455,7 +5455,7 @@ "text": null, "value": null }, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "hide": 0, "includeAll": true, "label": "Broker", @@ -5482,7 +5482,7 @@ "text": null, "value": null }, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "hide": 0, "includeAll": false, "label": "Quantile", diff --git a/grafana-dashboards/cfk/kafka-connect-cluster.json b/grafana-dashboards/cfk/kafka-connect-cluster.json index 3f2b4934..c230452e 100644 --- a/grafana-dashboards/cfk/kafka-connect-cluster.json +++ b/grafana-dashboards/cfk/kafka-connect-cluster.json @@ -62,8 +62,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Kafka Connect online workers returning metrics.\n ", "editable": true, "error": false, "fieldConfig": { @@ -145,8 +145,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Number of tasks deployed on Kafka Connect cluster.\n ", "editable": true, "error": false, "fieldConfig": { @@ -228,8 +228,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Number of Running Tasks on the Kafka Connect cluster.\n Ideally, this number should be equal to the total number of tasks deployed.\n ", "editable": true, "error": false, "fieldConfig": { @@ -319,8 +319,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Number of Paused Tasks on the Kafka Connect cluster.\n Ideally, this number should be zero, as tasks should be running.\n ", "editable": true, "error": false, "fieldConfig": { @@ -410,8 +410,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Number of Paused Tasks on the Kafka Connect cluster.\n Ideally, this number should be zero, as tasks should be running.\n It's recommended alerting when this value is higher than 0.\n ", "editable": true, "error": false, "fieldConfig": { @@ -424,7 +424,7 @@ "mode": "absolute", "steps": [ { - "color": "blue", + "color": "green", "index": 0, "line": true, "op": "gt", @@ -501,8 +501,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Informative value. Time since last rebalance.\n When this value is continuously and repeatedly low means some connectors are failing and rebalancing is triggered constantly.\n ", "editable": true, "error": false, "fieldConfig": { @@ -588,7 +588,7 @@ "mode": "thresholds" }, "columns": [], - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -808,7 +808,7 @@ "mode": "thresholds" }, "columns": [], - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -962,7 +962,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -1062,7 +1062,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -1203,8 +1203,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Rate of CPU seconds used by the Java process.\n 100% usage represents one core. \n If there are multiple cores, the total capacity should be 100% * number_cores.", "editable": true, "error": false, "fieldConfig": { @@ -1303,8 +1303,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Sum of JVM memory used, without including areas (e.g. heap size).", "editable": true, "error": false, "fieldConfig": { @@ -1403,8 +1403,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Sum of seconds used by Garbage Collection.", "editable": true, "error": false, "fieldConfig": { @@ -1533,7 +1533,7 @@ "panels": [ { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -1633,7 +1633,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -1733,7 +1733,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -1833,7 +1833,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -1975,7 +1975,7 @@ "panels": [ { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -2075,7 +2075,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -2175,7 +2175,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -2275,7 +2275,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -2375,7 +2375,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -2475,7 +2475,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -2617,7 +2617,7 @@ "panels": [ { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -2717,7 +2717,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -2817,7 +2817,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -2917,7 +2917,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -3059,7 +3059,7 @@ "panels": [ { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -3159,7 +3159,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -3259,7 +3259,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -3401,7 +3401,7 @@ "panels": [ { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -3501,7 +3501,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -3601,7 +3601,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -3701,7 +3701,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -3801,7 +3801,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -3901,7 +3901,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, diff --git a/grafana-dashboards/cfk/kafka-consumer.json b/grafana-dashboards/cfk/kafka-consumer.json index 669adbdf..342da669 100644 --- a/grafana-dashboards/cfk/kafka-consumer.json +++ b/grafana-dashboards/cfk/kafka-consumer.json @@ -62,7 +62,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -145,7 +145,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -228,7 +228,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -327,7 +327,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -418,7 +418,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -531,7 +531,7 @@ "panels": [ { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -631,7 +631,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -731,7 +731,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -831,7 +831,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -931,7 +931,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -1045,7 +1045,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -1159,7 +1159,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -1315,7 +1315,7 @@ "panels": [ { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -1415,7 +1415,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -1515,7 +1515,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -1615,7 +1615,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -1729,7 +1729,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -1843,7 +1843,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -1957,7 +1957,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -2057,7 +2057,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -2157,7 +2157,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -2257,7 +2257,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -2371,7 +2371,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -2485,7 +2485,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -2627,7 +2627,7 @@ "panels": [ { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -2727,7 +2727,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -2827,7 +2827,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -2927,7 +2927,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -3027,7 +3027,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -3127,7 +3127,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -3227,7 +3227,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -3327,7 +3327,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -3469,7 +3469,7 @@ "panels": [ { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -3569,7 +3569,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -3669,7 +3669,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -3783,7 +3783,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -3883,7 +3883,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -4025,7 +4025,7 @@ "panels": [ { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -4125,7 +4125,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -4225,7 +4225,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -4339,7 +4339,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, diff --git a/grafana-dashboards/cfk/kafka-producer.json b/grafana-dashboards/cfk/kafka-producer.json index 09eff447..08afbea0 100644 --- a/grafana-dashboards/cfk/kafka-producer.json +++ b/grafana-dashboards/cfk/kafka-producer.json @@ -62,7 +62,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -145,7 +145,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -236,7 +236,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -335,7 +335,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -448,7 +448,7 @@ "panels": [ { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -548,7 +548,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -648,7 +648,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -748,7 +748,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -848,7 +848,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -948,7 +948,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -1048,7 +1048,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -1148,7 +1148,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -1248,7 +1248,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -1348,7 +1348,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -1462,7 +1462,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -1576,7 +1576,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -1690,7 +1690,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -1804,7 +1804,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -1904,7 +1904,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -2046,7 +2046,7 @@ "panels": [ { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -2146,7 +2146,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -2246,7 +2246,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -2346,7 +2346,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -2446,7 +2446,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -2546,7 +2546,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -2646,7 +2646,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -2746,7 +2746,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -2888,7 +2888,7 @@ "panels": [ { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -2988,7 +2988,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -3088,7 +3088,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -3202,7 +3202,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -3302,7 +3302,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -3444,7 +3444,7 @@ "panels": [ { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -3544,7 +3544,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -3644,7 +3644,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -3744,7 +3744,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -3844,7 +3844,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, diff --git a/grafana-dashboards/cfk/kafka-quotas.json b/grafana-dashboards/cfk/kafka-quotas.json index 8895d515..08caa085 100644 --- a/grafana-dashboards/cfk/kafka-quotas.json +++ b/grafana-dashboards/cfk/kafka-quotas.json @@ -21,7 +21,7 @@ "panels": [ { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -121,7 +121,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -221,7 +221,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -321,7 +321,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -421,7 +421,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -521,7 +521,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, diff --git a/grafana-dashboards/cfk/kafka-topics.json b/grafana-dashboards/cfk/kafka-topics.json index 19c187b5..20e0198c 100644 --- a/grafana-dashboards/cfk/kafka-topics.json +++ b/grafana-dashboards/cfk/kafka-topics.json @@ -62,7 +62,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -164,7 +164,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -266,7 +266,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -368,7 +368,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -470,7 +470,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -572,7 +572,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -708,7 +708,7 @@ "mode": "thresholds" }, "columns": [], - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -841,7 +841,7 @@ "mode": "thresholds" }, "columns": [], - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, diff --git a/grafana-dashboards/cfk/ksqldb-cluster.json b/grafana-dashboards/cfk/ksqldb-cluster.json index 56b4f887..054efed1 100644 --- a/grafana-dashboards/cfk/ksqldb-cluster.json +++ b/grafana-dashboards/cfk/ksqldb-cluster.json @@ -62,8 +62,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "ksqlDB online instances returning metrics.\n ", "editable": true, "error": false, "fieldConfig": { @@ -145,8 +145,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Number of active queries deployed in the cluster.\n ", "editable": true, "error": false, "fieldConfig": { @@ -228,8 +228,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Number of running queries deployed in the cluster.\n Ideally, this number should be equal to the number of active queries as queries should be running.\n ", "editable": true, "error": false, "fieldConfig": { @@ -319,8 +319,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Number of queries rebalancing in the cluster.\n Ideally, this number should be equal zero, or return to zero in a short period (e.g. 1 minute).\n It's recommended to alert if the number of rebalancing queries stay higher than 0 for a longer period of time.\n ", "editable": true, "error": false, "fieldConfig": { @@ -410,8 +410,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Number of queries failed in the cluster.\n Ideally, this number should be equal zero.\n It's recommended to alert if the number of queries failed is higher than 0.\n ", "editable": true, "error": false, "fieldConfig": { @@ -494,14 +494,14 @@ ], "timeFrom": null, "timeShift": null, - "title": "Connect: Sum of Failed Queries", + "title": "Connect: Sum of Queries Failed", "transformations": [], "transparent": false, "type": "stat" }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -601,7 +601,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -701,7 +701,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -842,8 +842,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Rate of CPU seconds used by the Java process.\n 100% usage represents one core. \n If there are multiple cores, the total capacity should be 100% * number_cores.", "editable": true, "error": false, "fieldConfig": { @@ -942,8 +942,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Sum of JVM memory used, without including areas (e.g. heap size).", "editable": true, "error": false, "fieldConfig": { @@ -1042,8 +1042,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Sum of seconds used by Garbage Collection.", "editable": true, "error": false, "fieldConfig": { @@ -1172,7 +1172,7 @@ "panels": [ { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -1272,7 +1272,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -1372,7 +1372,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -1472,7 +1472,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -1572,7 +1572,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -1672,7 +1672,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -1772,7 +1772,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -1872,7 +1872,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -2014,7 +2014,7 @@ "panels": [ { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -2114,7 +2114,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -2214,7 +2214,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -2314,7 +2314,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -2414,7 +2414,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -2514,7 +2514,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -2614,7 +2614,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -2714,7 +2714,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -2814,7 +2814,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -2914,7 +2914,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -3014,7 +3014,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -3114,7 +3114,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -3214,7 +3214,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -3314,7 +3314,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -3414,7 +3414,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, diff --git a/grafana-dashboards/cfk/schema-registry-cluster.json b/grafana-dashboards/cfk/schema-registry-cluster.json index 760aaf71..30f4c28e 100644 --- a/grafana-dashboards/cfk/schema-registry-cluster.json +++ b/grafana-dashboards/cfk/schema-registry-cluster.json @@ -62,8 +62,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Schema Registry online instances returning metrics.\n ", "editable": true, "error": false, "fieldConfig": { @@ -161,8 +161,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Average number of registered schemas across the cluster.\n ", "editable": true, "error": false, "fieldConfig": { @@ -225,7 +225,7 @@ "expr": "avg(kafka_schema_registry_registered_count{namespace=\"$env\"})", "format": "time_series", "hide": false, - "instant": true, + "instant": false, "interval": "", "intervalFactor": 2, "legendFormat": "", @@ -237,15 +237,15 @@ ], "timeFrom": null, "timeShift": null, - "title": "SR: Sum of Registered Schemas", + "title": "SR: Registered Schemas (avg.)", "transformations": [], "transparent": false, "type": "stat" }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Average number of schemas created, by type.\n ", "editable": true, "error": false, "fieldConfig": { @@ -320,14 +320,14 @@ ], "timeFrom": null, "timeShift": null, - "title": "SR: Sum of Created Schemas by Type", + "title": "SR: Created Schemas by Type (avg.)", "transformations": [], "transparent": false, "type": "stat" }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -410,7 +410,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -534,7 +534,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -634,7 +634,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -734,7 +734,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, diff --git a/grafana-dashboards/cfk/zookeeper-cluster.json b/grafana-dashboards/cfk/zookeeper-cluster.json index 235e17c0..3fa22d69 100644 --- a/grafana-dashboards/cfk/zookeeper-cluster.json +++ b/grafana-dashboards/cfk/zookeeper-cluster.json @@ -62,8 +62,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Quorum Size of Zookeeper ensemble.\n Count Zookeeper servers with quorum size metric.\n ", "editable": true, "error": false, "fieldConfig": { @@ -161,8 +161,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Average size of ZNodes in the cluster.\n Getting the node count per server, and averaging the node count.\n ", "editable": true, "error": false, "fieldConfig": { @@ -237,15 +237,15 @@ ], "timeFrom": null, "timeShift": null, - "title": "ZK: Avg. number of ZNodes", + "title": "ZK: ZNodes (avg.)", "transformations": [], "transparent": false, "type": "stat" }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Sum of the number of alive connections per servers divided by the maximum number of client connections allowed per host.\n If the percentage is higher than 60%, then Zookeeper should be scaled and/or the Zookeeper clients should be investigated to find the reason for high number of connections opened.\n ", "editable": true, "error": false, "fieldConfig": { @@ -258,16 +258,32 @@ "mode": "absolute", "steps": [ { - "color": "blue", + "color": "green", "index": 0, "line": true, "op": "gt", "value": "null", "yaxis": "left" + }, + { + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 0.6, + "yaxis": "left" + }, + { + "color": "red", + "index": 2, + "line": true, + "op": "gt", + "value": 0.8, + "yaxis": "left" } ] }, - "unit": "none" + "unit": "percentunit" }, "overrides": [] }, @@ -305,7 +321,7 @@ "targets": [ { "datasource": null, - "expr": "sum(zookeeper_numaliveconnections{namespace=\"$env\"})", + "expr": "zookeeper_numaliveconnections{namespace=\"$env\"} / zookeeper_maxclientcnxnsperhost{namespace=\"$env\"}", "format": "time_series", "hide": false, "instant": false, @@ -320,15 +336,15 @@ ], "timeFrom": null, "timeShift": null, - "title": "ZK: Sum of number of Alive Connections", + "title": "ZK: Connections used", "transformations": [], "transparent": false, "type": "stat" }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Sum of client watchers subscribed to changes on the ZNodes.\n ", "editable": true, "error": false, "fieldConfig": { @@ -410,8 +426,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Number of requests waiting for processing (queued).\n If the number of outstanding requests grows higher than 10, then the Zookeeper hosts should be checked.\n It could mean that there is not enough resources to cope with the number of requests.\n ", "editable": true, "error": false, "fieldConfig": { @@ -448,7 +464,32 @@ "mappings": [], "thresholds": { "mode": "absolute", - "steps": [] + "steps": [ + { + "color": "green", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + }, + { + "color": "red", + "index": 2, + "line": true, + "op": "gt", + "value": 10.0, + "yaxis": "left" + } + ] }, "unit": "" }, @@ -550,7 +591,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -650,7 +691,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -750,7 +791,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -880,7 +921,7 @@ "panels": [ { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -980,7 +1021,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -1080,7 +1121,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -1222,7 +1263,7 @@ "panels": [ { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -1322,7 +1363,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -1424,7 +1465,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -1526,7 +1567,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -1628,7 +1669,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, diff --git a/grafana-dashboards/confluent-platform.py b/grafana-dashboards/confluent-platform.py index 1de19e9f..af5aa005 100644 --- a/grafana-dashboards/confluent-platform.py +++ b/grafana-dashboards/confluent-platform.py @@ -3,29 +3,58 @@ def dashboard( + ds="Prometheus", env_label="namespace", server_label="pod", connect_cluster_label="app", ksqldb_cluster_label="app", ): + """ + Confluent Platform dashboard + It includes all Confluent components: + - Zookeeper + - Kafka + - Schema Registry + - Kafka Connect (repeated per cluster) + - ksqlDB (repeated per cluster) + + Structure: + - Default sizes + - Queries + - Templating (variables) + - Panel groups + - Dashboard definition + + Dashboard is defined by a name, it includes the variables to template panels, and then adds the panels. + Panels are grouped in Row to load only needed panels and load others on demand. + + Invariants: + - Max width: 24 + """ + + # Default sizes default_height = 5 stat_width = 4 + # Queries + by_env = env_label + '="$env"' + + # Templating (variables) templating = G.Templating( list=[ G.Template( name="env", label="Environment", - dataSource="Prometheus", + dataSource=ds, query="label_values(" + env_label + ")", ), G.Template( name="connect_cluster", label="Kafka Connect cluster", - dataSource="Prometheus", + dataSource=ds, query="label_values(kafka_connect_connect_worker_metrics_connector_count{" - + env_label - + '="$env"}, ' + + by_env + + "}, " + connect_cluster_label + ")", hide=True, @@ -33,10 +62,10 @@ def dashboard( G.Template( name="ksqldb_cluster", label="ksqlDB cluster", - dataSource="Prometheus", + dataSource=ds, query="label_values(ksql_ksql_engine_query_stats_liveness_indicator{" - + env_label - + '="$env"}, ' + + by_env + + "}, " + ksqldb_cluster_label + ")", hide=True, @@ -44,6 +73,9 @@ def dashboard( ] ) + # Panel groups + ## Zookeeper panes: + ### When updating descriptions on these panels, also update descriptions in zookeeper-cluster.py zk_panels = [ G.RowPanel( title="Zookeeper cluster", @@ -51,10 +83,13 @@ def dashboard( ), G.Stat( title="ZK: Quorum Size", - dataSource="${DS_PROMETHEUS}", + description="""Quorum Size of Zookeeper ensemble. + Count Zookeeper servers with quorum size metric. + """, + dataSource=ds, targets=[ G.Target( - expr="count(zookeeper_status_quorumsize{" + env_label + '="$env"})', + expr="count(zookeeper_status_quorumsize{" + by_env + "})", ), ], reduceCalc="last", @@ -66,13 +101,14 @@ def dashboard( gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 0, y=0), ), G.Stat( - title="ZK: Avg. number of ZNodes", - dataSource="${DS_PROMETHEUS}", + title="ZK: ZNodes (avg.)", + description="""Average size of ZNodes in the cluster. + Getting the node count per server, and averaging the node count. + """, + dataSource=ds, targets=[ G.Target( - expr="avg(zookeeper_inmemorydatatree_nodecount{" - + env_label - + '="$env"})', + expr="avg(zookeeper_inmemorydatatree_nodecount{" + by_env + "})", ), ], reduceCalc="last", @@ -82,27 +118,37 @@ def dashboard( gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 1, y=0), ), G.Stat( - title="ZK: Sum of number of Alive Connections", - dataSource="${DS_PROMETHEUS}", + title="ZK: Connections used", + description="""Sum of the number of alive connections per servers divided by the maximum number of client connections allowed per host. + If the percentage is higher than 60%, then Zookeeper should be scaled and/or the Zookeeper clients should be investigated to find the reason for high number of connections opened. + """, + dataSource=ds, targets=[ G.Target( - expr="sum(zookeeper_numaliveconnections{" + env_label + '="$env"})', + expr="zookeeper_numaliveconnections{" + + by_env + + "} / zookeeper_maxclientcnxnsperhost{" + + by_env + + "}", ), ], reduceCalc="last", thresholds=[ - G.Threshold(index=0, value=0.0, color="blue"), + G.Threshold(index=0, value=0.0, color="green"), + G.Threshold(index=1, value=0.6, color="yellow"), + G.Threshold(index=2, value=0.8, color="red"), ], + format="percentunit", gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 2, y=0), ), G.Stat( title="ZK: Sum of watchers", - dataSource="${DS_PROMETHEUS}", + description="""Sum of client watchers subscribed to changes on the ZNodes. + """, + dataSource=ds, targets=[ G.Target( - expr="sum(zookeeper_inmemorydatatree_watchcount{" - + env_label - + '="$env"})', + expr="sum(zookeeper_inmemorydatatree_watchcount{" + by_env + "})", ), ], reduceCalc="last", @@ -113,10 +159,14 @@ def dashboard( ), G.TimeSeries( title="ZK: Outstanding Requests", - dataSource="${DS_PROMETHEUS}", + description="""Number of requests waiting for processing (queued). + If the number of outstanding requests grows higher than 10, then the Zookeeper hosts should be checked. + It could mean that there is not enough resources to cope with the number of requests. + """, + dataSource=ds, targets=[ G.Target( - expr="zookeeper_outstandingrequests{" + env_label + '="$env"}', + expr="zookeeper_outstandingrequests{" + by_env + "}", legendFormat="{{pod}} ({{server_id}}:{{member_type}})", ), ], @@ -124,9 +174,16 @@ def dashboard( legendCalcs=["max", "last"], legendPlacement="right", gridPos=G.GridPos(h=default_height, w=8, x=stat_width * 4, y=0), + thresholds=[ + G.Threshold(index=0, value=0.0, color="green"), + G.Threshold(index=1, value=1.0, color="yellow"), + G.Threshold(index=2, value=10.0, color="red"), + ], ), ] + ## Kafka panels + ### When updating descriptions on these panels, also update descriptions in kafka-cluster.py kafka_panels = [ G.RowPanel( title="Kafka cluster", @@ -134,12 +191,15 @@ def dashboard( ), G.Stat( title="Kafka: Online Brokers", - dataSource="${DS_PROMETHEUS}", + description="""Count of brokers available (online). + This value is referential and should not be used for alerting. + """, + dataSource=ds, targets=[ G.Target( expr="count(kafka_server_replicamanager_leadercount{" - + env_label - + '="$env"})', + + by_env + + "})", ), ], reduceCalc="last", @@ -150,12 +210,15 @@ def dashboard( ), G.Stat( title="Kafka: Active Controller", - dataSource="${DS_PROMETHEUS}", + description="""Active Controller broker. + It should always be 1. If the value is different than 1, then it must be alerted for troubleshooting. + """, + dataSource=ds, targets=[ G.Target( expr="kafka_controller_kafkacontroller_activecontrollercount{" - + env_label - + '="$env"} > 0', + + by_env + + "} > 0", legendFormat="{{" + server_label + "}}", ), ], @@ -167,13 +230,15 @@ def dashboard( gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 1, y=1), ), G.Stat( - title="Kafka: Sum of Partitioenv", - dataSource="${DS_PROMETHEUS}", + title="Kafka: Sum of Partitions", + description="""Sum of Topic partitions across the cluster. + """, + dataSource=ds, targets=[ G.Target( expr="sum(kafka_server_replicamanager_partitioncount{" - + env_label - + '="$env"})', + + by_env + + "})", ), ], reduceCalc="last", @@ -183,13 +248,16 @@ def dashboard( gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 2, y=1), ), G.Stat( - title="Kafka: Sum of Partitions Under-Replicated (URP)", - dataSource="${DS_PROMETHEUS}", + title="Kafka: Sum of Under-Replicated Partitions (URP)", + description="""Sum of Under-Replicated Partitions. This is caused by broker or volumes unavailable, impacting replicas to be offline, and reducing the ISR set for those partitions. + There are transient scenarios that could lead to this number growing (e.g. broker restart), but if the number doesn't shrink in a short period of time (e.g. 1 minute), then it's recommended to alert. + """, + dataSource=ds, targets=[ G.Target( expr="sum(kafka_server_replicamanager_underreplicatedpartitions{" - + env_label - + '="$env"})', + + by_env + + "})", ), ], reduceCalc="last", @@ -200,13 +268,15 @@ def dashboard( gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 3, y=1), ), G.Stat( - title="Kafka: Sum of Partitioenv Under-MinISR", - dataSource="${DS_PROMETHEUS}", + title="Kafka: Sum of Under-MinISR Partitions", + description="""Number of partitions where the number of replicas offline is higher than the minimum ISR configuration. + This means partitions are not available for Producers with acks=all. + It's recommended alerting when this values is higher than 0. + """, + dataSource=ds, targets=[ G.Target( - expr="sum(kafka_cluster_partition_underminisr{" - + env_label - + '="$env"})', + expr="sum(kafka_cluster_partition_underminisr{" + by_env + "})", ), ], reduceCalc="last", @@ -217,13 +287,17 @@ def dashboard( gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 4, y=1), ), G.Stat( - title="Kafka: Sum of Partitions Offline", - dataSource="${DS_PROMETHEUS}", + title="Kafka: Sum of Offline Partitions", + description="""Number of partitions where all replicas are offline. + Producers and Consumers are affected by this condition. + It's recommended alerting when this values is higher than 0. + """, + dataSource=ds, targets=[ G.Target( expr="sum(kafka_controller_kafkacontroller_offlinepartitionscount{" - + env_label - + '="$env"})', + + by_env + + "})", ), ], reduceCalc="last", @@ -235,6 +309,8 @@ def dashboard( ), ] + ## Schema Registry panels: + ### When updating descriptions on these panels, also update descriptions in schema-registry-cluster.py sr_panels = [ G.RowPanel( title="Schema Registry cluster", @@ -242,12 +318,14 @@ def dashboard( ), G.Stat( title="SR: Online instances", - dataSource="${DS_PROMETHEUS}", + description="""Schema Registry online instances returning metrics. + """, + dataSource=ds, targets=[ G.Target( expr="count(kafka_schema_registry_registered_count{" - + env_label - + '="$env"})', + + by_env + + "})", ), ], reduceCalc="last", @@ -259,14 +337,13 @@ def dashboard( gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 0, y=2), ), G.Stat( - title="SR: Sum of Registered Schemas", - dataSource="${DS_PROMETHEUS}", + title="SR: Registered Schemas (avg.)", + description="""Average number of registered schemas across the cluster. + """, + dataSource=ds, targets=[ G.Target( - expr="avg(kafka_schema_registry_registered_count{" - + env_label - + '="$env"})', - instant=True, + expr="avg(kafka_schema_registry_registered_count{" + by_env + "})", ), ], reduceCalc="last", @@ -276,13 +353,15 @@ def dashboard( gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 1, y=2), ), G.Stat( - title="SR: Sum of Created Schemas by Type", - dataSource="${DS_PROMETHEUS}", + title="SR: Schemas Created by Type (avg.)", + description="""Average number of schemas created, by type. + """, + dataSource=ds, targets=[ G.Target( expr="avg(kafka_schema_registry_schemas_created{" - + env_label - + '="$env"}) by (schema_type)', + + by_env + + "}) by (schema_type)", legendFormat="{{schema_type}}", ), ], @@ -293,13 +372,15 @@ def dashboard( gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 2, y=2), ), G.Stat( - title="SR: Sum of Deleted Schemas by Type", - dataSource="${DS_PROMETHEUS}", + title="SR: Schemas Deleted by Type (avg.)", + description="""Average number of schemas deleted, by type. + """, + dataSource=ds, targets=[ G.Target( expr="sum(kafka_schema_registry_schemas_deleted{" - + env_label - + '="$env"}) by (schema_type)', + + by_env + + "}) by (schema_type)", legendFormat="{{schema_type}}", ), ], @@ -311,15 +392,19 @@ def dashboard( ), ] + ## Kafka Connect cluster panels: + ### When updating descriptions on these panels, also update descriptions in kafka-connect-cluster.py connect_inner = [ G.Stat( title="Connect: Online Workers", - dataSource="${DS_PROMETHEUS}", + description="""Kafka Connect online workers returning metrics. + """, + dataSource=ds, targets=[ G.Target( expr="count(kafka_connect_connect_worker_metrics_connector_count{" - + env_label - + '="$env",' + + by_env + + "," + connect_cluster_label + '=~"$connect_cluster"})', ), @@ -332,12 +417,14 @@ def dashboard( ), G.Stat( title="Connect: Sum of Total Tasks", - dataSource="${DS_PROMETHEUS}", + description="""Number of tasks deployed on Kafka Connect cluster. + """, + dataSource=ds, targets=[ G.Target( expr="sum(kafka_connect_connect_worker_metrics_connector_total_task_count{" - + env_label - + '="$env",' + + by_env + + "," + connect_cluster_label + '=~"$connect_cluster"})', ), @@ -350,12 +437,15 @@ def dashboard( ), G.Stat( title="Connect: Sum of Running Tasks", - dataSource="${DS_PROMETHEUS}", + description="""Number of Running Tasks on the Kafka Connect cluster. + Ideally, this number should be equal to the total number of tasks deployed. + """, + dataSource=ds, targets=[ G.Target( expr="sum(kafka_connect_connect_worker_metrics_connector_running_task_count{" - + env_label - + '="$env",' + + by_env + + "," + connect_cluster_label + '=~"$connect_cluster"})', ), @@ -369,12 +459,15 @@ def dashboard( ), G.Stat( title="Connect: Sum of Paused Tasks", - dataSource="${DS_PROMETHEUS}", + description="""Number of Paused Tasks on the Kafka Connect cluster. + Ideally, this number should be zero, as tasks should be running. + """, + dataSource=ds, targets=[ G.Target( expr="sum(kafka_connect_connect_worker_metrics_connector_paused_task_count{" - + env_label - + '="$env",' + + by_env + + "," + connect_cluster_label + '=~"$connect_cluster"})', ), @@ -388,31 +481,38 @@ def dashboard( ), G.Stat( title="Connect: Sum of Failed Tasks", - dataSource="${DS_PROMETHEUS}", + description="""Number of Paused Tasks on the Kafka Connect cluster. + Ideally, this number should be zero, as tasks should be running. + It's recommended alerting when this value is higher than 0. + """, + dataSource=ds, targets=[ G.Target( expr="sum(kafka_connect_connect_worker_metrics_connector_failed_task_count{" - + env_label - + '="$env",' + + by_env + + "," + connect_cluster_label + '=~"$connect_cluster"})', ), ], reduceCalc="last", thresholds=[ - G.Threshold(index=0, value=0.0, color="blue"), + G.Threshold(index=0, value=0.0, color="green"), G.Threshold(index=1, value=1.0, color="red"), ], gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 4, y=3), ), G.Stat( title="Connect: Time since last rebalance", - dataSource="${DS_PROMETHEUS}", + description="""Informative value. Time since last rebalance. + When this value is continuously and repeatedly low means some connectors are failing and rebalancing is triggered constantly. + """, + dataSource=ds, targets=[ G.Target( expr="kafka_connect_connect_worker_rebalance_metrics_time_since_last_rebalance_ms{" - + env_label - + '="$env",' + + by_env + + "," + connect_cluster_label + '=~"$connect_cluster"} >= 0', legendFormat="{{pod}}", @@ -427,7 +527,7 @@ def dashboard( gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 5, y=3), ), ] - + ### Repeat as there could be multiple connect clusters per environment. connect_panels = [ G.RowPanel( title="Kafka Connect cluster: $connect_cluster", @@ -438,15 +538,19 @@ def dashboard( ), ] + ## ksqlDB cluster panels: + ### When updating descriptions on these panels, also update descriptions in ksqldb-cluster.py ksqldb_inner = [ G.Stat( - title="ksqlDB: Online Servers", - dataSource="${DS_PROMETHEUS}", + title="ksqlDB: Online instances", + description="""ksqlDB online instances returning metrics. + """, + dataSource=ds, targets=[ G.Target( expr="count(ksql_ksql_engine_query_stats_num_active_queries{" - + env_label - + '="$env", ' + + by_env + + "," + ksqldb_cluster_label + '="$ksqldb_cluster"})', ), @@ -459,12 +563,14 @@ def dashboard( ), G.Stat( title="ksqlDB: Sum of Active Queries", - dataSource="${DS_PROMETHEUS}", + description="""Number of active queries deployed in the cluster. + """, + dataSource=ds, targets=[ G.Target( expr="sum(ksql_ksql_engine_query_stats_num_active_queries{" - + env_label - + '="$env", ' + + by_env + + "," + ksqldb_cluster_label + '="$ksqldb_cluster"})', ), @@ -477,12 +583,15 @@ def dashboard( ), G.Stat( title="ksqlDB: Sum of Running Queries", - dataSource="${DS_PROMETHEUS}", + description="""Number of running queries deployed in the cluster. + Ideally, this number should be equal to the number of active queries as queries should be running. + """, + dataSource=ds, targets=[ G.Target( expr="sum(ksql_ksql_engine_query_stats_running_queries{" - + env_label - + '="$env", ' + + by_env + + "," + ksqldb_cluster_label + '="$ksqldb_cluster"})', ), @@ -496,12 +605,16 @@ def dashboard( ), G.Stat( title="ksqlDB: Sum of Rebalancing Queries", - dataSource="${DS_PROMETHEUS}", + description="""Number of queries rebalancing in the cluster. + Ideally, this number should be equal zero, or return to zero in a short period (e.g. 1 minute). + It's recommended to alert if the number of rebalancing queries stay higher than 0 for a longer period of time. + """, + dataSource=ds, targets=[ G.Target( expr="sum(ksql_ksql_engine_query_stats_rebalancing_queries{" - + env_label - + '="$env", ' + + by_env + + "," + ksqldb_cluster_label + '="$ksqldb_cluster"})', ), @@ -514,13 +627,17 @@ def dashboard( gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 3, y=4), ), G.Stat( - title="Connect: Sum of Failed Queries", - dataSource="${DS_PROMETHEUS}", + title="Connect: Sum of Queries Failed", + description="""Number of queries failed in the cluster. + Ideally, this number should be equal zero. + It's recommended to alert if the number of queries failed is higher than 0. + """, + dataSource=ds, targets=[ G.Target( expr="avg(ksql_ksql_engine_query_stats_error_queries{" - + env_label - + '="$env", ' + + by_env + + "," + ksqldb_cluster_label + '="$ksqldb_cluster"})', ), @@ -533,7 +650,7 @@ def dashboard( gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 4, y=4), ), ] - + ### Repeat as there could be multiple ksqldb clusters per environment. ksqldb_panels = [ G.RowPanel( title="ksqlDB cluster: $ksqldb_cluster", @@ -544,8 +661,10 @@ def dashboard( ), ] + # group all panels panels = zk_panels + kafka_panels + sr_panels + connect_panels + ksqldb_panels + # build dashboard return G.Dashboard( title="Confluent Platform overview - v2", description="Overview of the main health-check metrics from Confluent Platform components.", @@ -572,12 +691,16 @@ def dashboard( ).auto_panel_ids() +# main labels to customize dashboard +ds = os.environ.get("DATASOURCE", "Prometheus") env_label = os.environ.get("ENV_LABEL", "env") server_label = os.environ.get("SERVER_LABEL", "hostname") connect_cluster_label = os.environ.get( "CONNECT_CLUSTER_LABEL", "kafka_connect_cluster_id" ) ksqldb_cluster_label = os.environ.get("KSQLDB_CLUSTER_LABEL", "ksqldb_cluster_id") + +# dashboard required by grafanalib dashboard = dashboard( - env_label, server_label, connect_cluster_label, ksqldb_cluster_label + ds, env_label, server_label, connect_cluster_label, ksqldb_cluster_label ) diff --git a/grafana-dashboards/default/confluent-platform.json b/grafana-dashboards/default/confluent-platform.json index 28a286ae..4e89ac2d 100644 --- a/grafana-dashboards/default/confluent-platform.json +++ b/grafana-dashboards/default/confluent-platform.json @@ -62,8 +62,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Quorum Size of Zookeeper ensemble.\n Count Zookeeper servers with quorum size metric.\n ", "editable": true, "error": false, "fieldConfig": { @@ -161,8 +161,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Average size of ZNodes in the cluster.\n Getting the node count per server, and averaging the node count.\n ", "editable": true, "error": false, "fieldConfig": { @@ -237,15 +237,15 @@ ], "timeFrom": null, "timeShift": null, - "title": "ZK: Avg. number of ZNodes", + "title": "ZK: ZNodes (avg.)", "transformations": [], "transparent": false, "type": "stat" }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Sum of the number of alive connections per servers divided by the maximum number of client connections allowed per host.\n If the percentage is higher than 60%, then Zookeeper should be scaled and/or the Zookeeper clients should be investigated to find the reason for high number of connections opened.\n ", "editable": true, "error": false, "fieldConfig": { @@ -258,16 +258,32 @@ "mode": "absolute", "steps": [ { - "color": "blue", + "color": "green", "index": 0, "line": true, "op": "gt", "value": "null", "yaxis": "left" + }, + { + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 0.6, + "yaxis": "left" + }, + { + "color": "red", + "index": 2, + "line": true, + "op": "gt", + "value": 0.8, + "yaxis": "left" } ] }, - "unit": "none" + "unit": "percentunit" }, "overrides": [] }, @@ -305,7 +321,7 @@ "targets": [ { "datasource": null, - "expr": "sum(zookeeper_numaliveconnections{env=\"$env\"})", + "expr": "zookeeper_numaliveconnections{env=\"$env\"} / zookeeper_maxclientcnxnsperhost{env=\"$env\"}", "format": "time_series", "hide": false, "instant": false, @@ -320,15 +336,15 @@ ], "timeFrom": null, "timeShift": null, - "title": "ZK: Sum of number of Alive Connections", + "title": "ZK: Connections used", "transformations": [], "transparent": false, "type": "stat" }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Sum of client watchers subscribed to changes on the ZNodes.\n ", "editable": true, "error": false, "fieldConfig": { @@ -410,8 +426,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Number of requests waiting for processing (queued).\n If the number of outstanding requests grows higher than 10, then the Zookeeper hosts should be checked.\n It could mean that there is not enough resources to cope with the number of requests.\n ", "editable": true, "error": false, "fieldConfig": { @@ -448,7 +464,32 @@ "mappings": [], "thresholds": { "mode": "absolute", - "steps": [] + "steps": [ + { + "color": "green", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + }, + { + "color": "red", + "index": 2, + "line": true, + "op": "gt", + "value": 10.0, + "yaxis": "left" + } + ] }, "unit": "" }, @@ -550,8 +591,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Count of brokers available (online).\n This value is referential and should not be used for alerting.\n ", "editable": true, "error": false, "fieldConfig": { @@ -633,8 +674,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Active Controller broker.\n It should always be 1. If the value is different than 1, then it must be alerted for troubleshooting.\n ", "editable": true, "error": false, "fieldConfig": { @@ -716,8 +757,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Sum of Topic partitions across the cluster.\n ", "editable": true, "error": false, "fieldConfig": { @@ -792,15 +833,15 @@ ], "timeFrom": null, "timeShift": null, - "title": "Kafka: Sum of Partitioenv", + "title": "Kafka: Sum of Partitions", "transformations": [], "transparent": false, "type": "stat" }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Sum of Under-Replicated Partitions. This is caused by broker or volumes unavailable, impacting replicas to be offline, and reducing the ISR set for those partitions.\n There are transient scenarios that could lead to this number growing (e.g. broker restart), but if the number doesn't shrink in a short period of time (e.g. 1 minute), then it's recommended to alert.\n ", "editable": true, "error": false, "fieldConfig": { @@ -883,15 +924,15 @@ ], "timeFrom": null, "timeShift": null, - "title": "Kafka: Sum of Partitions Under-Replicated (URP)", + "title": "Kafka: Sum of Under-Replicated Partitions (URP)", "transformations": [], "transparent": false, "type": "stat" }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Number of partitions where the number of replicas offline is higher than the minimum ISR configuration.\n This means partitions are not available for Producers with acks=all.\n It's recommended alerting when this values is higher than 0.\n ", "editable": true, "error": false, "fieldConfig": { @@ -974,15 +1015,15 @@ ], "timeFrom": null, "timeShift": null, - "title": "Kafka: Sum of Partitioenv Under-MinISR", + "title": "Kafka: Sum of Under-MinISR Partitions", "transformations": [], "transparent": false, "type": "stat" }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Number of partitions where all replicas are offline.\n Producers and Consumers are affected by this condition.\n It's recommended alerting when this values is higher than 0.\n ", "editable": true, "error": false, "fieldConfig": { @@ -1065,7 +1106,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "Kafka: Sum of Partitions Offline", + "title": "Kafka: Sum of Offline Partitions", "transformations": [], "transparent": false, "type": "stat" @@ -1113,8 +1154,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Schema Registry online instances returning metrics.\n ", "editable": true, "error": false, "fieldConfig": { @@ -1212,8 +1253,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Average number of registered schemas across the cluster.\n ", "editable": true, "error": false, "fieldConfig": { @@ -1276,7 +1317,7 @@ "expr": "avg(kafka_schema_registry_registered_count{env=\"$env\"})", "format": "time_series", "hide": false, - "instant": true, + "instant": false, "interval": "", "intervalFactor": 2, "legendFormat": "", @@ -1288,15 +1329,15 @@ ], "timeFrom": null, "timeShift": null, - "title": "SR: Sum of Registered Schemas", + "title": "SR: Registered Schemas (avg.)", "transformations": [], "transparent": false, "type": "stat" }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Average number of schemas created, by type.\n ", "editable": true, "error": false, "fieldConfig": { @@ -1371,15 +1412,15 @@ ], "timeFrom": null, "timeShift": null, - "title": "SR: Sum of Created Schemas by Type", + "title": "SR: Schemas Created by Type (avg.)", "transformations": [], "transparent": false, "type": "stat" }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Average number of schemas deleted, by type.\n ", "editable": true, "error": false, "fieldConfig": { @@ -1454,7 +1495,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "SR: Sum of Deleted Schemas by Type", + "title": "SR: Schemas Deleted by Type (avg.)", "transformations": [], "transparent": false, "type": "stat" @@ -1491,8 +1532,8 @@ "panels": [ { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Kafka Connect online workers returning metrics.\n ", "editable": true, "error": false, "fieldConfig": { @@ -1574,8 +1615,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Number of tasks deployed on Kafka Connect cluster.\n ", "editable": true, "error": false, "fieldConfig": { @@ -1657,8 +1698,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Number of Running Tasks on the Kafka Connect cluster.\n Ideally, this number should be equal to the total number of tasks deployed.\n ", "editable": true, "error": false, "fieldConfig": { @@ -1748,8 +1789,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Number of Paused Tasks on the Kafka Connect cluster.\n Ideally, this number should be zero, as tasks should be running.\n ", "editable": true, "error": false, "fieldConfig": { @@ -1839,8 +1880,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Number of Paused Tasks on the Kafka Connect cluster.\n Ideally, this number should be zero, as tasks should be running.\n It's recommended alerting when this value is higher than 0.\n ", "editable": true, "error": false, "fieldConfig": { @@ -1853,7 +1894,7 @@ "mode": "absolute", "steps": [ { - "color": "blue", + "color": "green", "index": 0, "line": true, "op": "gt", @@ -1930,8 +1971,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Informative value. Time since last rebalance.\n When this value is continuously and repeatedly low means some connectors are failing and rebalancing is triggered constantly.\n ", "editable": true, "error": false, "fieldConfig": { @@ -2055,8 +2096,8 @@ "panels": [ { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "ksqlDB online instances returning metrics.\n ", "editable": true, "error": false, "fieldConfig": { @@ -2116,7 +2157,7 @@ "targets": [ { "datasource": null, - "expr": "count(ksql_ksql_engine_query_stats_num_active_queries{env=\"$env\", ksqldb_cluster_id=\"$ksqldb_cluster\"})", + "expr": "count(ksql_ksql_engine_query_stats_num_active_queries{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\"})", "format": "time_series", "hide": false, "instant": false, @@ -2131,15 +2172,15 @@ ], "timeFrom": null, "timeShift": null, - "title": "ksqlDB: Online Servers", + "title": "ksqlDB: Online instances", "transformations": [], "transparent": false, "type": "stat" }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Number of active queries deployed in the cluster.\n ", "editable": true, "error": false, "fieldConfig": { @@ -2199,7 +2240,7 @@ "targets": [ { "datasource": null, - "expr": "sum(ksql_ksql_engine_query_stats_num_active_queries{env=\"$env\", ksqldb_cluster_id=\"$ksqldb_cluster\"})", + "expr": "sum(ksql_ksql_engine_query_stats_num_active_queries{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\"})", "format": "time_series", "hide": false, "instant": false, @@ -2221,8 +2262,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Number of running queries deployed in the cluster.\n Ideally, this number should be equal to the number of active queries as queries should be running.\n ", "editable": true, "error": false, "fieldConfig": { @@ -2290,7 +2331,7 @@ "targets": [ { "datasource": null, - "expr": "sum(ksql_ksql_engine_query_stats_running_queries{env=\"$env\", ksqldb_cluster_id=\"$ksqldb_cluster\"})", + "expr": "sum(ksql_ksql_engine_query_stats_running_queries{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\"})", "format": "time_series", "hide": false, "instant": false, @@ -2312,8 +2353,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Number of queries rebalancing in the cluster.\n Ideally, this number should be equal zero, or return to zero in a short period (e.g. 1 minute).\n It's recommended to alert if the number of rebalancing queries stay higher than 0 for a longer period of time.\n ", "editable": true, "error": false, "fieldConfig": { @@ -2381,7 +2422,7 @@ "targets": [ { "datasource": null, - "expr": "sum(ksql_ksql_engine_query_stats_rebalancing_queries{env=\"$env\", ksqldb_cluster_id=\"$ksqldb_cluster\"})", + "expr": "sum(ksql_ksql_engine_query_stats_rebalancing_queries{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\"})", "format": "time_series", "hide": false, "instant": false, @@ -2403,8 +2444,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Number of queries failed in the cluster.\n Ideally, this number should be equal zero.\n It's recommended to alert if the number of queries failed is higher than 0.\n ", "editable": true, "error": false, "fieldConfig": { @@ -2472,7 +2513,7 @@ "targets": [ { "datasource": null, - "expr": "avg(ksql_ksql_engine_query_stats_error_queries{env=\"$env\", ksqldb_cluster_id=\"$ksqldb_cluster\"})", + "expr": "avg(ksql_ksql_engine_query_stats_error_queries{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\"})", "format": "time_series", "hide": false, "instant": false, @@ -2487,7 +2528,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "Connect: Sum of Failed Queries", + "title": "Connect: Sum of Queries Failed", "transformations": [], "transparent": false, "type": "stat" diff --git a/grafana-dashboards/default/kafka-cluster.json b/grafana-dashboards/default/kafka-cluster.json index 25b5f484..1c7f6dbe 100644 --- a/grafana-dashboards/default/kafka-cluster.json +++ b/grafana-dashboards/default/kafka-cluster.json @@ -55,15 +55,15 @@ "targets": [], "timeFrom": null, "timeShift": null, - "title": "Overview", + "title": "Cluster Overview", "transformations": [], "transparent": false, "type": "row" }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Count of brokers available (online).\n ", "editable": true, "error": false, "fieldConfig": { @@ -145,8 +145,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Active Controller broker.\n It should always be 1. If the value is different than 1, then it must be alerted for troubleshooting.\n ", "editable": true, "error": false, "fieldConfig": { @@ -228,8 +228,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "\n Number of partitions where the preferred replica is not the leader.\n Usually, this number is 0.\n Restarting nodes could cause this values to change, but when reassigning happens the value stabilize.\n ", "editable": true, "error": false, "fieldConfig": { @@ -304,15 +304,15 @@ ], "timeFrom": null, "timeShift": null, - "title": "Kafka: Sum of Replica Imbalance", + "title": "Kafka: Sum of Preferred Replica Imbalance", "transformations": [], "transparent": false, "type": "stat" }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Number of topics in the cluster.", "editable": true, "error": false, "fieldConfig": { @@ -394,8 +394,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Sum of requests per second rated over a 5 min. period.\n Gives an idea of the processing load in the cluster.", "editable": true, "error": false, "fieldConfig": { @@ -477,8 +477,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Sum of log sizes per broker.\n This must be compared with the total storage space available in the brokers.", "editable": true, "error": false, "fieldConfig": { @@ -553,15 +553,15 @@ ], "timeFrom": null, "timeShift": null, - "title": "Kafka: Logs Size", + "title": "Kafka: Log Size", "transformations": [], "transparent": false, "type": "stat" }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Sum of Topic partitions across the cluster.\n ", "editable": true, "error": false, "fieldConfig": { @@ -643,8 +643,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Sum of Under-Replicated Partitions. This is caused by broker or volumes unavailable, impacting replicas to be offline, and reducing the ISR set for those partitions.\n There are transient scenarios that could lead to this number growing (e.g. broker restart), but if the number doesn't shrink in a short period of time (e.g. 1 minute), then it's recommended to alert.\n ", "editable": true, "error": false, "fieldConfig": { @@ -727,15 +727,15 @@ ], "timeFrom": null, "timeShift": null, - "title": "Kafka: Sum of Partitions Under-Replicated (URP)", + "title": "Kafka: Sum of Under-Replicated Partitions (URP)", "transformations": [], "transparent": false, "type": "stat" }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Number of partitions where the number of replicas offline is higher than the minimum ISR configuration.\n This means partitions are not available for Producers with acks=all.\n We recommend alerting when this values is higher than 0.\n ", "editable": true, "error": false, "fieldConfig": { @@ -818,15 +818,15 @@ ], "timeFrom": null, "timeShift": null, - "title": "Kafka: Sum of Partitions Under-MinISR", + "title": "Kafka: Sum of Under-MinISR Partitions", "transformations": [], "transparent": false, "type": "stat" }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Number of partitions where all replicas are offline.\n Producers and Consumers are affected by this condition.\n We recommend alerting when this values is higher than 0.\n ", "editable": true, "error": false, "fieldConfig": { @@ -909,15 +909,15 @@ ], "timeFrom": null, "timeShift": null, - "title": "Kafka: Sum of Partitions Offline", + "title": "Kafka: Sum of Offline Partitions", "transformations": [], "transparent": false, "type": "stat" }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Sum of bytes in per second rated over a 5 min. period.\n Gives an idea of the incoming throughput handle by the cluster.\n ", "editable": true, "error": false, "fieldConfig": { @@ -999,8 +999,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Sum of bytes out per second rated over a 5 min. period.\n Gives an idea of the outgoing throughput handle by the cluster.\n ", "editable": true, "error": false, "fieldConfig": { @@ -1116,15 +1116,15 @@ "targets": [], "timeFrom": null, "timeShift": null, - "title": "System", + "title": "System resources", "transformations": [], "transparent": false, "type": "row" }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Rate of CPU seconds used by the Java process.\n 100% usage represents one core. \n If there are multiple cores, the total capacity should be 100% * number_cores.", "editable": true, "error": false, "fieldConfig": { @@ -1223,8 +1223,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Sum of JVM memory used, without including areas (e.g. heap size).", "editable": true, "error": false, "fieldConfig": { @@ -1323,8 +1323,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Sum of seconds used by Garbage Collection.", "editable": true, "error": false, "fieldConfig": { @@ -1425,7 +1425,7 @@ "cacheTimeout": null, "collapsed": true, "datasource": null, - "description": "Bytes in/out per second", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -1453,8 +1453,8 @@ "panels": [ { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Number of messages into topics per second, aggregated by sum without topic.", "editable": true, "error": false, "fieldConfig": { @@ -1553,8 +1553,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Number of bytes into topics per second, aggregated by sum without topic.", "editable": true, "error": false, "fieldConfig": { @@ -1653,8 +1653,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Number of bytes out of topics per second, aggregated by sum without topic.", "editable": true, "error": false, "fieldConfig": { @@ -1767,7 +1767,7 @@ "cacheTimeout": null, "collapsed": true, "datasource": null, - "description": "Internal thread pools usage", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -1795,8 +1795,8 @@ "panels": [ { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Percent of time the network thread pool is used.\n It should be below 60% or the capacity of threads should be tuned or \n the cluster scaled to cope with the load.", "editable": true, "error": false, "fieldConfig": { @@ -1895,8 +1895,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Percent of time the IO thread pool is used.\n It should be below 60% or the capacity of threads should be tuned or \n the cluster scaled to cope with the load.", "editable": true, "error": false, "fieldConfig": { @@ -2000,7 +2000,7 @@ "targets": [], "timeFrom": null, "timeShift": null, - "title": "Thread Utilization", + "title": "Thread utilization", "transformations": [], "transparent": false, "type": "row" @@ -2009,7 +2009,7 @@ "cacheTimeout": null, "collapsed": true, "datasource": null, - "description": "Sum of req/sec rates", + "description": null, "editable": true, "error": false, "fieldConfig": { @@ -2037,8 +2037,8 @@ "panels": [ { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Requests per second rated over a 5 minutes period.\n Includes API call and version.\n ", "editable": true, "error": false, "fieldConfig": { @@ -2140,8 +2140,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Request Errors per second rated over a 5 minutes period.\n Includes API call and version.\n ", "editable": true, "error": false, "fieldConfig": { @@ -2285,8 +2285,8 @@ "panels": [ { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Sum of connections count across cluster by brokers", "editable": true, "error": false, "fieldConfig": { @@ -2385,8 +2385,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Sum of rate of connections created across cluster by brokers", "editable": true, "error": false, "fieldConfig": { @@ -2485,8 +2485,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Sum of rate of connections closed across cluster by brokers", "editable": true, "error": false, "fieldConfig": { @@ -2585,8 +2585,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Sum of connections count across cluster by listeners", "editable": true, "error": false, "fieldConfig": { @@ -2685,8 +2685,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Sum of rate of connections created across cluster by listener", "editable": true, "error": false, "fieldConfig": { @@ -2785,8 +2785,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Sum of rate of connections closed across cluster by listener", "editable": true, "error": false, "fieldConfig": { @@ -2927,8 +2927,8 @@ "panels": [ { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Rate of ISR shrinks per second.\n If this value is continuously higher than 0, then troubleshoot cause of ISR changing constantly.\n ", "editable": true, "error": false, "fieldConfig": { @@ -3027,8 +3027,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Rate of ISR expands per second.\n If this value is continuously higher than 0, then troubleshoot cause of ISR changing constantly.\n ", "editable": true, "error": false, "fieldConfig": { @@ -3169,8 +3169,8 @@ "panels": [ { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Time expend on the request queue.\n Moved from network socket to request queue by Network threads.\n ", "editable": true, "error": false, "fieldConfig": { @@ -3269,8 +3269,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Time expend doing local IO.\n Moved from request queue to storage device operations by IO threads.\n ", "editable": true, "error": false, "fieldConfig": { @@ -3369,8 +3369,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Time expend waiting for coordination with other brokers/internal condition.\n At purgatory.\n ", "editable": true, "error": false, "fieldConfig": { @@ -3469,8 +3469,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Time expend waiting in response queue.\n Moved from purgatory to response queue by IO threads.\n ", "editable": true, "error": false, "fieldConfig": { @@ -3569,8 +3569,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Time expend delivering response.\n Moved from response queue to client by Networkc threads.\n ", "editable": true, "error": false, "fieldConfig": { @@ -3711,8 +3711,8 @@ "panels": [ { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Time expend on the request queue.\n Moved from network socket to request queue by Network threads.\n ", "editable": true, "error": false, "fieldConfig": { @@ -3811,8 +3811,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Time expend doing local IO.\n Moved from request queue to storage device operations by IO threads.\n ", "editable": true, "error": false, "fieldConfig": { @@ -3911,8 +3911,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Time expend waiting for coordination with other brokers/internal condition.\n At purgatory.\n ", "editable": true, "error": false, "fieldConfig": { @@ -4011,8 +4011,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Time expend waiting in response queue.\n Moved from purgatory to response queue by IO threads.\n ", "editable": true, "error": false, "fieldConfig": { @@ -4111,8 +4111,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Time expend delivering response.\n Moved from response queue to client by Networkc threads.\n ", "editable": true, "error": false, "fieldConfig": { @@ -4253,8 +4253,8 @@ "panels": [ { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Time expend on the request queue.\n Moved from network socket to request queue by Network threads.\n ", "editable": true, "error": false, "fieldConfig": { @@ -4353,8 +4353,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Time expend doing local IO.\n Moved from request queue to storage device operations by IO threads.\n ", "editable": true, "error": false, "fieldConfig": { @@ -4453,8 +4453,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Time expend waiting for coordination with other brokers/internal condition.\n At purgatory.\n ", "editable": true, "error": false, "fieldConfig": { @@ -4553,8 +4553,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Time expend waiting in response queue.\n Moved from purgatory to response queue by IO threads.\n ", "editable": true, "error": false, "fieldConfig": { @@ -4653,8 +4653,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Time expend delivering response.\n Moved from response queue to client by Networkc threads.\n ", "editable": true, "error": false, "fieldConfig": { @@ -4795,8 +4795,8 @@ "panels": [ { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Number of groups managed by Broker", "editable": true, "error": false, "fieldConfig": { @@ -4895,8 +4895,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Number of stable groups managed by Broker", "editable": true, "error": false, "fieldConfig": { @@ -5046,7 +5046,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "Number of Groups per Broker", + "title": "Number of Groups per Broker per Status", "transformations": [], "transparent": false, "type": "timeseries" @@ -5095,8 +5095,8 @@ "panels": [ { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Sum of produce message conversions per second.\n This value increases when the broker receives produce messages from clients using older versions.\n ", "editable": true, "error": false, "fieldConfig": { @@ -5195,8 +5195,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Sum of fetch message conversions per second.\n This value increases when the broker receives fetch messages from clients using older versions.\n ", "editable": true, "error": false, "fieldConfig": { @@ -5295,8 +5295,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Sum of connections aggregated by client version and name.\n ", "editable": true, "error": false, "fieldConfig": { diff --git a/grafana-dashboards/default/kafka-connect-cluster.json b/grafana-dashboards/default/kafka-connect-cluster.json index 5b9b8544..d2e12193 100644 --- a/grafana-dashboards/default/kafka-connect-cluster.json +++ b/grafana-dashboards/default/kafka-connect-cluster.json @@ -62,8 +62,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Kafka Connect online workers returning metrics.\n ", "editable": true, "error": false, "fieldConfig": { @@ -145,8 +145,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Number of tasks deployed on Kafka Connect cluster.\n ", "editable": true, "error": false, "fieldConfig": { @@ -228,8 +228,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Number of Running Tasks on the Kafka Connect cluster.\n Ideally, this number should be equal to the total number of tasks deployed.\n ", "editable": true, "error": false, "fieldConfig": { @@ -319,8 +319,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Number of Paused Tasks on the Kafka Connect cluster.\n Ideally, this number should be zero, as tasks should be running.\n ", "editable": true, "error": false, "fieldConfig": { @@ -410,8 +410,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Number of Paused Tasks on the Kafka Connect cluster.\n Ideally, this number should be zero, as tasks should be running.\n It's recommended alerting when this value is higher than 0.\n ", "editable": true, "error": false, "fieldConfig": { @@ -424,7 +424,7 @@ "mode": "absolute", "steps": [ { - "color": "blue", + "color": "green", "index": 0, "line": true, "op": "gt", @@ -501,8 +501,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Informative value. Time since last rebalance.\n When this value is continuously and repeatedly low means some connectors are failing and rebalancing is triggered constantly.\n ", "editable": true, "error": false, "fieldConfig": { @@ -588,7 +588,7 @@ "mode": "thresholds" }, "columns": [], - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -808,7 +808,7 @@ "mode": "thresholds" }, "columns": [], - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -962,7 +962,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -1062,7 +1062,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -1203,8 +1203,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Rate of CPU seconds used by the Java process.\n 100% usage represents one core. \n If there are multiple cores, the total capacity should be 100% * number_cores.", "editable": true, "error": false, "fieldConfig": { @@ -1303,8 +1303,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Sum of JVM memory used, without including areas (e.g. heap size).", "editable": true, "error": false, "fieldConfig": { @@ -1403,8 +1403,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Sum of seconds used by Garbage Collection.", "editable": true, "error": false, "fieldConfig": { @@ -1533,7 +1533,7 @@ "panels": [ { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -1633,7 +1633,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -1733,7 +1733,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -1833,7 +1833,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -1975,7 +1975,7 @@ "panels": [ { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -2075,7 +2075,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -2175,7 +2175,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -2275,7 +2275,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -2375,7 +2375,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -2475,7 +2475,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -2617,7 +2617,7 @@ "panels": [ { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -2717,7 +2717,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -2817,7 +2817,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -2917,7 +2917,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -3059,7 +3059,7 @@ "panels": [ { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -3159,7 +3159,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -3259,7 +3259,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -3401,7 +3401,7 @@ "panels": [ { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -3501,7 +3501,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -3601,7 +3601,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -3701,7 +3701,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -3801,7 +3801,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -3901,7 +3901,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, diff --git a/grafana-dashboards/default/kafka-consumer.json b/grafana-dashboards/default/kafka-consumer.json index d0305287..3aef1c22 100644 --- a/grafana-dashboards/default/kafka-consumer.json +++ b/grafana-dashboards/default/kafka-consumer.json @@ -62,7 +62,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -145,7 +145,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -228,7 +228,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -327,7 +327,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -418,7 +418,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -531,7 +531,7 @@ "panels": [ { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -631,7 +631,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -731,7 +731,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -831,7 +831,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -931,7 +931,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -1045,7 +1045,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -1159,7 +1159,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -1315,7 +1315,7 @@ "panels": [ { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -1415,7 +1415,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -1515,7 +1515,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -1615,7 +1615,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -1729,7 +1729,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -1843,7 +1843,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -1957,7 +1957,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -2057,7 +2057,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -2157,7 +2157,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -2257,7 +2257,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -2371,7 +2371,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -2485,7 +2485,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -2627,7 +2627,7 @@ "panels": [ { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -2727,7 +2727,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -2827,7 +2827,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -2927,7 +2927,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -3027,7 +3027,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -3127,7 +3127,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -3227,7 +3227,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -3327,7 +3327,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -3469,7 +3469,7 @@ "panels": [ { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -3569,7 +3569,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -3669,7 +3669,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -3783,7 +3783,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -3883,7 +3883,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -4025,7 +4025,7 @@ "panels": [ { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -4125,7 +4125,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -4225,7 +4225,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -4339,7 +4339,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, diff --git a/grafana-dashboards/default/kafka-producer.json b/grafana-dashboards/default/kafka-producer.json index 66acd77d..17359359 100644 --- a/grafana-dashboards/default/kafka-producer.json +++ b/grafana-dashboards/default/kafka-producer.json @@ -62,7 +62,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -145,7 +145,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -236,7 +236,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -335,7 +335,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -448,7 +448,7 @@ "panels": [ { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -548,7 +548,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -648,7 +648,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -748,7 +748,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -848,7 +848,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -948,7 +948,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -1048,7 +1048,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -1148,7 +1148,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -1248,7 +1248,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -1348,7 +1348,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -1462,7 +1462,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -1576,7 +1576,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -1690,7 +1690,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -1804,7 +1804,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -1904,7 +1904,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -2046,7 +2046,7 @@ "panels": [ { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -2146,7 +2146,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -2246,7 +2246,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -2346,7 +2346,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -2446,7 +2446,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -2546,7 +2546,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -2646,7 +2646,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -2746,7 +2746,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -2888,7 +2888,7 @@ "panels": [ { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -2988,7 +2988,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -3088,7 +3088,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -3202,7 +3202,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -3302,7 +3302,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -3444,7 +3444,7 @@ "panels": [ { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -3544,7 +3544,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -3644,7 +3644,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -3744,7 +3744,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -3844,7 +3844,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, diff --git a/grafana-dashboards/default/kafka-quotas.json b/grafana-dashboards/default/kafka-quotas.json index b22c758b..189bcad9 100644 --- a/grafana-dashboards/default/kafka-quotas.json +++ b/grafana-dashboards/default/kafka-quotas.json @@ -21,7 +21,7 @@ "panels": [ { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -121,7 +121,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -221,7 +221,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -321,7 +321,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -421,7 +421,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -521,7 +521,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, diff --git a/grafana-dashboards/default/kafka-topics.json b/grafana-dashboards/default/kafka-topics.json index 8c3b1527..7354ae5c 100644 --- a/grafana-dashboards/default/kafka-topics.json +++ b/grafana-dashboards/default/kafka-topics.json @@ -62,7 +62,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -164,7 +164,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -266,7 +266,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -368,7 +368,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -470,7 +470,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -572,7 +572,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -708,7 +708,7 @@ "mode": "thresholds" }, "columns": [], - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -841,7 +841,7 @@ "mode": "thresholds" }, "columns": [], - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, diff --git a/grafana-dashboards/default/ksqldb-cluster.json b/grafana-dashboards/default/ksqldb-cluster.json index aabb7b56..0452da9c 100644 --- a/grafana-dashboards/default/ksqldb-cluster.json +++ b/grafana-dashboards/default/ksqldb-cluster.json @@ -62,8 +62,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "ksqlDB online instances returning metrics.\n ", "editable": true, "error": false, "fieldConfig": { @@ -145,8 +145,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Number of active queries deployed in the cluster.\n ", "editable": true, "error": false, "fieldConfig": { @@ -228,8 +228,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Number of running queries deployed in the cluster.\n Ideally, this number should be equal to the number of active queries as queries should be running.\n ", "editable": true, "error": false, "fieldConfig": { @@ -319,8 +319,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Number of queries rebalancing in the cluster.\n Ideally, this number should be equal zero, or return to zero in a short period (e.g. 1 minute).\n It's recommended to alert if the number of rebalancing queries stay higher than 0 for a longer period of time.\n ", "editable": true, "error": false, "fieldConfig": { @@ -410,8 +410,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Number of queries failed in the cluster.\n Ideally, this number should be equal zero.\n It's recommended to alert if the number of queries failed is higher than 0.\n ", "editable": true, "error": false, "fieldConfig": { @@ -494,14 +494,14 @@ ], "timeFrom": null, "timeShift": null, - "title": "Connect: Sum of Failed Queries", + "title": "Connect: Sum of Queries Failed", "transformations": [], "transparent": false, "type": "stat" }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -601,7 +601,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -701,7 +701,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -842,8 +842,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Rate of CPU seconds used by the Java process.\n 100% usage represents one core. \n If there are multiple cores, the total capacity should be 100% * number_cores.", "editable": true, "error": false, "fieldConfig": { @@ -942,8 +942,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Sum of JVM memory used, without including areas (e.g. heap size).", "editable": true, "error": false, "fieldConfig": { @@ -1042,8 +1042,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Sum of seconds used by Garbage Collection.", "editable": true, "error": false, "fieldConfig": { @@ -1172,7 +1172,7 @@ "panels": [ { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -1272,7 +1272,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -1372,7 +1372,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -1472,7 +1472,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -1572,7 +1572,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -1672,7 +1672,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -1772,7 +1772,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -1872,7 +1872,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -2014,7 +2014,7 @@ "panels": [ { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -2114,7 +2114,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -2214,7 +2214,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -2314,7 +2314,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -2414,7 +2414,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -2514,7 +2514,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -2614,7 +2614,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -2714,7 +2714,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -2814,7 +2814,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -2914,7 +2914,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -3014,7 +3014,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -3114,7 +3114,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -3214,7 +3214,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -3314,7 +3314,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -3414,7 +3414,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, diff --git a/grafana-dashboards/default/schema-registry-cluster.json b/grafana-dashboards/default/schema-registry-cluster.json index b49fe69d..caf43878 100644 --- a/grafana-dashboards/default/schema-registry-cluster.json +++ b/grafana-dashboards/default/schema-registry-cluster.json @@ -62,8 +62,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Schema Registry online instances returning metrics.\n ", "editable": true, "error": false, "fieldConfig": { @@ -161,8 +161,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Average number of registered schemas across the cluster.\n ", "editable": true, "error": false, "fieldConfig": { @@ -225,7 +225,7 @@ "expr": "avg(kafka_schema_registry_registered_count{env=\"$env\"})", "format": "time_series", "hide": false, - "instant": true, + "instant": false, "interval": "", "intervalFactor": 2, "legendFormat": "", @@ -237,15 +237,15 @@ ], "timeFrom": null, "timeShift": null, - "title": "SR: Sum of Registered Schemas", + "title": "SR: Registered Schemas (avg.)", "transformations": [], "transparent": false, "type": "stat" }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Average number of schemas created, by type.\n ", "editable": true, "error": false, "fieldConfig": { @@ -320,14 +320,14 @@ ], "timeFrom": null, "timeShift": null, - "title": "SR: Sum of Created Schemas by Type", + "title": "SR: Created Schemas by Type (avg.)", "transformations": [], "transparent": false, "type": "stat" }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -410,7 +410,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -534,7 +534,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -634,7 +634,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -734,7 +734,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, diff --git a/grafana-dashboards/default/zookeeper-cluster.json b/grafana-dashboards/default/zookeeper-cluster.json index b1982b8d..465129dc 100644 --- a/grafana-dashboards/default/zookeeper-cluster.json +++ b/grafana-dashboards/default/zookeeper-cluster.json @@ -62,8 +62,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Quorum Size of Zookeeper ensemble.\n Count Zookeeper servers with quorum size metric.\n ", "editable": true, "error": false, "fieldConfig": { @@ -161,8 +161,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Average size of ZNodes in the cluster.\n Getting the node count per server, and averaging the node count.\n ", "editable": true, "error": false, "fieldConfig": { @@ -237,15 +237,15 @@ ], "timeFrom": null, "timeShift": null, - "title": "ZK: Avg. number of ZNodes", + "title": "ZK: ZNodes (avg.)", "transformations": [], "transparent": false, "type": "stat" }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Sum of the number of alive connections per servers divided by the maximum number of client connections allowed per host.\n If the percentage is higher than 60%, then Zookeeper should be scaled and/or the Zookeeper clients should be investigated to find the reason for high number of connections opened.\n ", "editable": true, "error": false, "fieldConfig": { @@ -258,16 +258,32 @@ "mode": "absolute", "steps": [ { - "color": "blue", + "color": "green", "index": 0, "line": true, "op": "gt", "value": "null", "yaxis": "left" + }, + { + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 0.6, + "yaxis": "left" + }, + { + "color": "red", + "index": 2, + "line": true, + "op": "gt", + "value": 0.8, + "yaxis": "left" } ] }, - "unit": "none" + "unit": "percentunit" }, "overrides": [] }, @@ -305,7 +321,7 @@ "targets": [ { "datasource": null, - "expr": "sum(zookeeper_numaliveconnections{env=\"$env\"})", + "expr": "zookeeper_numaliveconnections{env=\"$env\"} / zookeeper_maxclientcnxnsperhost{env=\"$env\"}", "format": "time_series", "hide": false, "instant": false, @@ -320,15 +336,15 @@ ], "timeFrom": null, "timeShift": null, - "title": "ZK: Sum of number of Alive Connections", + "title": "ZK: Connections used", "transformations": [], "transparent": false, "type": "stat" }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Sum of client watchers subscribed to changes on the ZNodes.\n ", "editable": true, "error": false, "fieldConfig": { @@ -410,8 +426,8 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, + "datasource": "Prometheus", + "description": "Number of requests waiting for processing (queued).\n If the number of outstanding requests grows higher than 10, then the Zookeeper hosts should be checked.\n It could mean that there is not enough resources to cope with the number of requests.\n ", "editable": true, "error": false, "fieldConfig": { @@ -448,7 +464,32 @@ "mappings": [], "thresholds": { "mode": "absolute", - "steps": [] + "steps": [ + { + "color": "green", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + }, + { + "color": "red", + "index": 2, + "line": true, + "op": "gt", + "value": 10.0, + "yaxis": "left" + } + ] }, "unit": "" }, @@ -550,7 +591,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -650,7 +691,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -750,7 +791,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -880,7 +921,7 @@ "panels": [ { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -980,7 +1021,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -1080,7 +1121,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -1222,7 +1263,7 @@ "panels": [ { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -1322,7 +1363,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -1424,7 +1465,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -1526,7 +1567,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, @@ -1628,7 +1669,7 @@ }, { "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", + "datasource": "Prometheus", "description": null, "editable": true, "error": false, diff --git a/grafana-dashboards/kafka-cluster.py b/grafana-dashboards/kafka-cluster.py index e94c011f..0ff4857d 100644 --- a/grafana-dashboards/kafka-cluster.py +++ b/grafana-dashboards/kafka-cluster.py @@ -2,26 +2,62 @@ import grafanalib.core as G -def dashboard(env_label="namespace", server_label="pod"): +def dashboard(ds="Prometheus", env_label="namespace", server_label="pod"): + """ + Kafka cluster dashboard + It includes: + - Cluster overview + - System resources + - Throughput + - Thread utilization + - Request rates + - Connections + - In-Sync Replicas + - Request latency: Producer + - Request latency: Consumer Fetch + - Request latency: Follower Fetch + - Group Coordinator + - Message Conversion + + Structure: + - Default sizes + - Queries + - Templating (variables) + - Panel groups + - Dashboard definition + + Dashboard is defined by a name, it includes the variables to template panels, and then adds the panels. + Panels are grouped in Row to load only needed panels and load others on demand. + + Invariants: + - Max width: 24 + """ + + # Default sizes default_height = 5 stat_width = 4 ts_width = 8 + # Queries + by_env = env_label + '="$env"' + by_env_and_server = env_label + '="$env",' + server_label + '=~"$broker"' + + # Templating (variables) templating = G.Templating( list=[ G.Template( name="env", label="Environment", - dataSource="Prometheus", + dataSource=ds, query="label_values(" + env_label + ")", ), G.Template( name="broker", label="Broker", - dataSource="Prometheus", + dataSource=ds, query="label_values(kafka_server_replicamanager_leadercount{" - + env_label - + '="$env"}, ' + + by_env + + "}, " + server_label + ")", multi=True, @@ -30,27 +66,33 @@ def dashboard(env_label="namespace", server_label="pod"): G.Template( name="quantile", label="Quantile", - dataSource="Prometheus", + dataSource=ds, query="label_values(kafka_network_requestmetrics_requestqueuetimems{" - + env_label - + '="$env"}, quantile)', + + by_env + + "}, quantile)", ), ] ) - healthcheck_panels = [ + # Panel groups + ## Cluster overview: + ### When updating descriptions on these panels, also update descriptions in confluent-platform.py + overview_panels = [ G.RowPanel( - title="Overview", + title="Cluster Overview", gridPos=G.GridPos(h=1, w=24, x=0, y=0), ), + # First group of stats G.Stat( title="Kafka: Online Brokers", - dataSource="${DS_PROMETHEUS}", + description="""Count of brokers available (online). + """, + dataSource=ds, targets=[ G.Target( expr="count(kafka_server_replicamanager_leadercount{" - + env_label - + '="$env"})', + + by_env + + "})", ), ], reduceCalc="last", @@ -61,12 +103,15 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.Stat( title="Kafka: Active Controller", - dataSource="${DS_PROMETHEUS}", + description="""Active Controller broker. + It should always be 1. If the value is different than 1, then it must be alerted for troubleshooting. + """, + dataSource=ds, targets=[ G.Target( expr="kafka_controller_kafkacontroller_activecontrollercount{" - + env_label - + '="$env"} > 0', + + by_env + + "} > 0", legendFormat="{{" + server_label + "}}", ), ], @@ -78,13 +123,18 @@ def dashboard(env_label="namespace", server_label="pod"): gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 1, y=0), ), G.Stat( - title="Kafka: Sum of Replica Imbalance", - dataSource="${DS_PROMETHEUS}", + title="Kafka: Sum of Preferred Replica Imbalance", + description=""" + Number of partitions where the preferred replica is not the leader. + Usually, this number is 0. + Restarting nodes could cause this values to change, but when reassigning happens the value stabilize. + """, + dataSource=ds, targets=[ G.Target( expr="sum(kafka_controller_kafkacontroller_preferredreplicaimbalancecount{" - + env_label - + '="$env"})', + + by_env + + "})", ), ], reduceCalc="last", @@ -95,12 +145,13 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.Stat( title="Kafka: Sum of Topics", - dataSource="${DS_PROMETHEUS}", + description="Number of topics in the cluster.", + dataSource=ds, targets=[ G.Target( expr="sum(kafka_controller_kafkacontroller_globaltopiccount{" - + env_label - + '="$env"})', + + by_env + + "})", ), ], reduceCalc="last", @@ -111,14 +162,14 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.Stat( title="Kafka: Rate of Requests/Sec", - dataSource="${DS_PROMETHEUS}", + description="""Sum of requests per second rated over a 5 min. period. + Gives an idea of the processing load in the cluster.""", + dataSource=ds, targets=[ G.Target( expr="sum(rate(kafka_network_requestmetrics_requestspersec{" - + env_label - + '="$env",' - + server_label - + '=~"$broker"}[5m]))', + + by_env_and_server + + "}[5m]))", ), ], reduceCalc="last", @@ -129,15 +180,15 @@ def dashboard(env_label="namespace", server_label="pod"): gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 4, y=0), ), G.Stat( - title="Kafka: Logs Size", - dataSource="${DS_PROMETHEUS}", + title="Kafka: Log Size", + description="""Sum of log sizes per broker. + This must be compared with the total storage space available in the brokers.""", + dataSource=ds, targets=[ G.Target( expr="sum(kafka_log_log_size{" - + env_label - + '="$env",' - + server_label - + '=~"$broker"}) by (' + + by_env_and_server + + "}) by (" + server_label + ")", legendFormat="{{" + server_label + "}}", @@ -151,16 +202,17 @@ def dashboard(env_label="namespace", server_label="pod"): format="bytes", gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 5, y=0), ), + # Second group of stats G.Stat( title="Kafka: Sum of Partitions", - dataSource="${DS_PROMETHEUS}", + description="""Sum of Topic partitions across the cluster. + """, + dataSource=ds, targets=[ G.Target( expr="sum(kafka_server_replicamanager_partitioncount{" - + env_label - + '="$env",' - + server_label - + '=~"$broker"})', + + by_env_and_server + + "})", ), ], reduceCalc="last", @@ -170,15 +222,16 @@ def dashboard(env_label="namespace", server_label="pod"): gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 0, y=1), ), G.Stat( - title="Kafka: Sum of Partitions Under-Replicated (URP)", - dataSource="${DS_PROMETHEUS}", + title="Kafka: Sum of Under-Replicated Partitions (URP)", + description="""Sum of Under-Replicated Partitions. This is caused by broker or volumes unavailable, impacting replicas to be offline, and reducing the ISR set for those partitions. + There are transient scenarios that could lead to this number growing (e.g. broker restart), but if the number doesn't shrink in a short period of time (e.g. 1 minute), then it's recommended to alert. + """, + dataSource=ds, targets=[ G.Target( expr="sum(kafka_server_replicamanager_underreplicatedpartitions{" - + env_label - + '="$env",' - + server_label - + '=~"$broker"})', + + by_env_and_server + + "})", ), ], reduceCalc="last", @@ -189,15 +242,17 @@ def dashboard(env_label="namespace", server_label="pod"): gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 1, y=1), ), G.Stat( - title="Kafka: Sum of Partitions Under-MinISR", - dataSource="${DS_PROMETHEUS}", + title="Kafka: Sum of Under-MinISR Partitions", + description="""Number of partitions where the number of replicas offline is higher than the minimum ISR configuration. + This means partitions are not available for Producers with acks=all. + We recommend alerting when this values is higher than 0. + """, + dataSource=ds, targets=[ G.Target( expr="sum(kafka_cluster_partition_underminisr{" - + env_label - + '="$env",' - + server_label - + '=~"$broker"})', + + by_env_and_server + + "})", ), ], reduceCalc="last", @@ -208,15 +263,17 @@ def dashboard(env_label="namespace", server_label="pod"): gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 2, y=1), ), G.Stat( - title="Kafka: Sum of Partitions Offline", - dataSource="${DS_PROMETHEUS}", + title="Kafka: Sum of Offline Partitions", + description="""Number of partitions where all replicas are offline. + Producers and Consumers are affected by this condition. + We recommend alerting when this values is higher than 0. + """, + dataSource=ds, targets=[ G.Target( expr="sum(kafka_controller_kafkacontroller_offlinepartitionscount{" - + env_label - + '="$env",' - + server_label - + '=~"$broker"})', + + by_env_and_server + + "})", ), ], reduceCalc="last", @@ -228,14 +285,15 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.Stat( title="Kafka: Bytes In/Sec", - dataSource="${DS_PROMETHEUS}", + description="""Sum of bytes in per second rated over a 5 min. period. + Gives an idea of the incoming throughput handle by the cluster. + """, + dataSource=ds, targets=[ G.Target( expr="sum(rate(kafka_server_brokertopicmetrics_bytesinpersec{" - + env_label - + '="$env",' - + server_label - + '=~"$broker"}[5m]))', + + by_env_and_server + + "}[5m]))", ), ], reduceCalc="last", @@ -247,14 +305,15 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.Stat( title="Kafka: Bytes Out/Sec", - dataSource="${DS_PROMETHEUS}", + description="""Sum of bytes out per second rated over a 5 min. period. + Gives an idea of the outgoing throughput handle by the cluster. + """, + dataSource=ds, targets=[ G.Target( expr="sum(rate(kafka_server_brokertopicmetrics_bytesoutpersec{" - + env_label - + '="$env",' - + server_label - + '=~"$broker"}[5m]))', + + by_env_and_server + + "}[5m]))", ), ], reduceCalc="last", @@ -266,23 +325,25 @@ def dashboard(env_label="namespace", server_label="pod"): ), ] + ## System resources: + ### When updating descriptions on these panels, also update descriptions in other cluster dashboards system_base = 2 - system_panels = [ G.RowPanel( - title="System", + title="System resources", gridPos=G.GridPos(h=1, w=24, x=0, y=system_base), ), G.TimeSeries( title="CPU usage", - dataSource="${DS_PROMETHEUS}", + description="""Rate of CPU seconds used by the Java process. + 100% usage represents one core. + If there are multiple cores, the total capacity should be 100% * number_cores.""", + dataSource=ds, targets=[ G.Target( expr="irate(process_cpu_seconds_total{" - + env_label - + '="$env",' - + server_label - + '=~"$broker"}[5m])', + + by_env_and_server + + "}[5m])", legendFormat="{{" + server_label + "}}", ), ], @@ -295,14 +356,13 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Memory usage", - dataSource="${DS_PROMETHEUS}", + description="""Sum of JVM memory used, without including areas (e.g. heap size).""", + dataSource=ds, targets=[ G.Target( expr="sum without(area)(jvm_memory_bytes_used{" - + env_label - + '="$env",' - + server_label - + '=~"$broker"})', + + by_env_and_server + + "})", legendFormat="{{" + server_label + "}}", ), ], @@ -315,14 +375,13 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="GC collection", - dataSource="${DS_PROMETHEUS}", + description="""Sum of seconds used by Garbage Collection.""", + dataSource=ds, targets=[ G.Target( expr="sum without(gc)(irate(jvm_gc_collection_seconds_sum{" - + env_label - + '="$env",' - + server_label - + '=~"$broker"}[5m]))', + + by_env_and_server + + "}[5m]))", legendFormat="{{" + server_label + "}}", ), ], @@ -335,18 +394,18 @@ def dashboard(env_label="namespace", server_label="pod"): ), ] + ## Throughput: throughtput_base = system_base + 1 throughput_inner = [ G.TimeSeries( title="Messages In/Sec", - dataSource="${DS_PROMETHEUS}", + description="""Number of messages into topics per second, aggregated by sum without topic.""", + dataSource=ds, targets=[ G.Target( expr="sum without(topic) (rate(kafka_server_brokertopicmetrics_messagesinpersec{" - + env_label - + '="$env",' - + server_label - + '=~"$broker"}[5m]))', + + by_env_and_server + + "}[5m]))", legendFormat="{{" + server_label + "}}", ), ], @@ -359,14 +418,13 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Bytes In/Sec", - dataSource="${DS_PROMETHEUS}", + description="""Number of bytes into topics per second, aggregated by sum without topic.""", + dataSource=ds, targets=[ G.Target( expr="sum without(topic) (rate(kafka_server_brokertopicmetrics_bytesinpersec{" - + env_label - + '="$env",' - + server_label - + '=~"$broker"}[5m]))', + + by_env_and_server + + "}[5m]))", legendFormat="{{" + server_label + "}}", ), ], @@ -379,14 +437,13 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Bytes Out/Sec", - dataSource="${DS_PROMETHEUS}", + description="""Number of bytes out of topics per second, aggregated by sum without topic.""", + dataSource=ds, targets=[ G.Target( expr="sum without(topic) (rate(kafka_server_brokertopicmetrics_bytesoutpersec{" - + env_label - + '="$env",' - + server_label - + '=~"$broker"}[5m]))', + + by_env_and_server + + "}[5m]))", legendFormat="{{" + server_label + "}}", ), ], @@ -401,25 +458,26 @@ def dashboard(env_label="namespace", server_label="pod"): throughput_panels = [ G.RowPanel( title="Throughput", - description="Bytes in/out per second", gridPos=G.GridPos(h=1, w=24, x=0, y=throughtput_base), collapsed=True, panels=throughput_inner, ), ] + ## Thread utilization: thread_base = throughtput_base + 1 thread_inner = [ G.TimeSeries( title="Network processor usage", - dataSource="${DS_PROMETHEUS}", + description="""Percent of time the network thread pool is used. + It should be below 60% or the capacity of threads should be tuned or + the cluster scaled to cope with the load.""", + dataSource=ds, targets=[ G.Target( expr="1-kafka_network_socketserver_networkprocessoravgidlepercent{" - + env_label - + '="$env",' - + server_label - + '=~"$broker"}', + + by_env_and_server + + "}", legendFormat="{{" + server_label + "}}", ), ], @@ -432,14 +490,15 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Request processor (IO) usage", - dataSource="${DS_PROMETHEUS}", + description="""Percent of time the IO thread pool is used. + It should be below 60% or the capacity of threads should be tuned or + the cluster scaled to cope with the load.""", + dataSource=ds, targets=[ G.Target( expr="1-kafka_server_kafkarequesthandlerpool_requesthandleravgidlepercent_total{" - + env_label - + '="$env",' - + server_label - + '=~"$broker"}', + + by_env_and_server + + "}", legendFormat="{{" + server_label + "}}", ), ], @@ -453,26 +512,33 @@ def dashboard(env_label="namespace", server_label="pod"): ] thread_panels = [ G.RowPanel( - title="Thread Utilization", - description="Internal thread pools usage", + title="Thread utilization", gridPos=G.GridPos(h=1, w=24, x=0, y=thread_base), collapsed=True, panels=thread_inner, ), ] + ## Request rates: request_base = thread_base + 1 + ### It has the special case of aggregating across the cluster. + ### As the number of labels is unknown and could be extended depending on the platform. + ### At the moment includes known labels: instance, pod, and stateful_kubernetes_io_pod_name + known_labels = "pod,instance,statefulset_kubernetes_io_pod_name" request_inner = [ G.TimeSeries( title="Requests rates", - dataSource="${DS_PROMETHEUS}", + description="""Requests per second rated over a 5 minutes period. + Includes API call and version. + """, + dataSource=ds, targets=[ G.Target( - expr="sum without(pod,instance,statefulset_kubernetes_io_pod_name)(rate(kafka_network_requestmetrics_requestspersec{" - + env_label - + '="$env",' - + server_label - + '=~"$broker"}[5m]))', + expr="sum without(" + + known_labels + + ")(rate(kafka_network_requestmetrics_requestspersec{" + + by_env_and_server + + "}[5m]))", legendFormat="{{request}}(v{{version}})", ), ], @@ -486,14 +552,17 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Error rates", - dataSource="${DS_PROMETHEUS}", + description="""Request Errors per second rated over a 5 minutes period. + Includes API call and version. + """, + dataSource=ds, targets=[ G.Target( - expr="sum without(pod,instance,statefulset_kubernetes_io_pod_name)(rate(kafka_network_requestmetrics_errorspersec{" - + env_label - + '="$env",' - + server_label - + '=~"$broker",error!="NONE"}[5m]))', + expr="sum without(" + + known_labels + + ")(rate(kafka_network_requestmetrics_errorspersec{" + + by_env_and_server + + ',error!="NONE"}[5m]))', legendFormat="{{error}}@{{request}}", ), ], @@ -509,25 +578,24 @@ def dashboard(env_label="namespace", server_label="pod"): request_panels = [ G.RowPanel( title="Request rates", - description="Sum of req/sec rates", gridPos=G.GridPos(h=1, w=24, x=0, y=request_base), collapsed=True, panels=request_inner, ), ] + ## Connections: connection_base = request_base + 1 connection_inner = [ G.TimeSeries( title="Sum of Connections alive per Broker", - dataSource="${DS_PROMETHEUS}", + description="Sum of connections count across cluster by brokers", + dataSource=ds, targets=[ G.Target( expr="sum(kafka_server_socketservermetrics_connection_count{" - + env_label - + '="$env",' - + server_label - + '=~"$broker"}) by (' + + by_env_and_server + + "}) by (" + server_label + ")", legendFormat="{{" + server_label + "}}", @@ -541,14 +609,13 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Sum of Connections creation rate per Broker", - dataSource="${DS_PROMETHEUS}", + description="Sum of rate of connections created across cluster by brokers", + dataSource=ds, targets=[ G.Target( expr="sum(kafka_server_socketservermetrics_connection_creation_rate{" - + env_label - + '="$env",' - + server_label - + '=~"$broker"}) by (' + + by_env_and_server + + "}) by (" + server_label + ")", legendFormat="{{" + server_label + "}}", @@ -562,14 +629,13 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Sum of Connections close rate per Broker", - dataSource="${DS_PROMETHEUS}", + description="Sum of rate of connections closed across cluster by brokers", + dataSource=ds, targets=[ G.Target( expr="sum(kafka_server_socketservermetrics_connection_close_rate{" - + env_label - + '="$env",' - + server_label - + '=~"$broker"}) by (' + + by_env_and_server + + "}) by (" + server_label + ")", legendFormat="{{" + server_label + "}}", @@ -584,14 +650,13 @@ def dashboard(env_label="namespace", server_label="pod"): # By Listener G.TimeSeries( title="Sum of Connections alive per Listener", - dataSource="${DS_PROMETHEUS}", + description="Sum of connections count across cluster by listeners", + dataSource=ds, targets=[ G.Target( expr="sum(kafka_server_socketservermetrics_connection_count{" - + env_label - + '="$env",' - + server_label - + '=~"$broker"}) by (listener)', + + by_env_and_server + + "}) by (listener)", legendFormat="{{listener}}", ), ], @@ -603,14 +668,13 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Sum of Connections creation rate per Listener", - dataSource="${DS_PROMETHEUS}", + description="Sum of rate of connections created across cluster by listener", + dataSource=ds, targets=[ G.Target( expr="sum(kafka_server_socketservermetrics_connection_creation_rate{" - + env_label - + '="$env",' - + server_label - + '=~"$broker"}) by (listener)', + + by_env_and_server + + "}) by (listener)", legendFormat="{{listener}}", ), ], @@ -622,14 +686,13 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Sum of Connections close rate per Listener", - dataSource="${DS_PROMETHEUS}", + description="Sum of rate of connections closed across cluster by listener", + dataSource=ds, targets=[ G.Target( expr="sum(kafka_server_socketservermetrics_connection_close_rate{" - + env_label - + '="$env",' - + server_label - + '=~"$broker"}) by (listener)', + + by_env_and_server + + "}) by (listener)", legendFormat="{{listener}}", ), ], @@ -649,18 +712,20 @@ def dashboard(env_label="namespace", server_label="pod"): ), ] + ## In-Sync Replicas: isr_base = connection_base + 2 isr_inner = [ G.TimeSeries( title="Rate of ISR Shrinks/sec", - dataSource="${DS_PROMETHEUS}", + description="""Rate of ISR shrinks per second. + If this value is continuously higher than 0, then troubleshoot cause of ISR changing constantly. + """, + dataSource=ds, targets=[ G.Target( expr="rate(kafka_server_replicamanager_isrshrinkspersec{" - + env_label - + '="$env",' - + server_label - + '=~"$broker"}[5m])', + + by_env_and_server + + "}[5m])", legendFormat="{{" + server_label + "}}", ), ], @@ -672,14 +737,15 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Rate of ISR Expands/sec", - dataSource="${DS_PROMETHEUS}", + description="""Rate of ISR expands per second. + If this value is continuously higher than 0, then troubleshoot cause of ISR changing constantly. + """, + dataSource=ds, targets=[ G.Target( expr="rate(kafka_server_replicamanager_isrexpandspersec{" - + env_label - + '="$env",' - + server_label - + '=~"$broker"}[5m])', + + by_env_and_server + + "}[5m])", legendFormat="{{" + server_label + "}}", ), ], @@ -699,18 +765,21 @@ def dashboard(env_label="namespace", server_label="pod"): ), ] + ## Request latency for Produce: + ### When changing these panels, also modify Consumer Fetch and Follower Fetch. producer_base = isr_base + 1 producer_inner = [ G.TimeSeries( title="Produce: Request Queue Time", - dataSource="${DS_PROMETHEUS}", + description="""Time expend on the request queue. + Moved from network socket to request queue by Network threads. + """, + dataSource=ds, targets=[ G.Target( expr="kafka_network_requestmetrics_requestqueuetimems{" - + env_label - + '="$env",' - + server_label - + '=~"$broker",quantile=~"$quantile",request="Produce"}', + + by_env_and_server + + ',quantile=~"$quantile",request="Produce"}', legendFormat="{{" + server_label + "}} ({{quantile}}th)", ), ], @@ -723,14 +792,15 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Produce: Local Time", - dataSource="${DS_PROMETHEUS}", + description="""Time expend doing local IO. + Moved from request queue to storage device operations by IO threads. + """, + dataSource=ds, targets=[ G.Target( expr="kafka_network_requestmetrics_localtimems{" - + env_label - + '="$env",' - + server_label - + '=~"$broker",quantile=~"$quantile",request="Produce"}', + + by_env_and_server + + ',quantile=~"$quantile",request="Produce"}', legendFormat="{{" + server_label + "}} ({{quantile}}th)", ), ], @@ -743,14 +813,15 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Produce: Remote Time", - dataSource="${DS_PROMETHEUS}", + description="""Time expend waiting for coordination with other brokers/internal condition. + At purgatory. + """, + dataSource=ds, targets=[ G.Target( expr="kafka_network_requestmetrics_remotetimems{" - + env_label - + '="$env",' - + server_label - + '=~"$broker",quantile=~"$quantile",request="Produce"}', + + by_env_and_server + + ',quantile=~"$quantile",request="Produce"}', legendFormat="{{" + server_label + "}} ({{quantile}}th)", ), ], @@ -763,14 +834,15 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Produce: Response Queue Time", - dataSource="${DS_PROMETHEUS}", + description="""Time expend waiting in response queue. + Moved from purgatory to response queue by IO threads. + """, + dataSource=ds, targets=[ G.Target( expr="kafka_network_requestmetrics_responsequeuetimems{" - + env_label - + '="$env",' - + server_label - + '=~"$broker",quantile=~"$quantile",request="Produce"}', + + by_env_and_server + + ',quantile=~"$quantile",request="Produce"}', legendFormat="{{" + server_label + "}} ({{quantile}}th)", ), ], @@ -783,14 +855,15 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Produce: Response Send Time", - dataSource="${DS_PROMETHEUS}", + description="""Time expend delivering response. + Moved from response queue to client by Networkc threads. + """, + dataSource=ds, targets=[ G.Target( expr="kafka_network_requestmetrics_responsesendtimems{" - + env_label - + '="$env",' - + server_label - + '=~"$broker",quantile=~"$quantile",request="Produce"}', + + by_env_and_server + + ',quantile=~"$quantile",request="Produce"}', legendFormat="{{" + server_label + "}} ({{quantile}}th)", ), ], @@ -811,18 +884,21 @@ def dashboard(env_label="namespace", server_label="pod"): ), ] + ## Request latency for Consumer Fetch: + ### When changing these panels, also modify Produce and Follower Fetch. consumer_base = producer_base + 2 consumer_inner = [ G.TimeSeries( title="Fetch: Request Queue Time", - dataSource="${DS_PROMETHEUS}", + description="""Time expend on the request queue. + Moved from network socket to request queue by Network threads. + """, + dataSource=ds, targets=[ G.Target( expr="kafka_network_requestmetrics_requestqueuetimems{" - + env_label - + '="$env",' - + server_label - + '=~"$broker",quantile=~"$quantile",request="Fetch"}', + + by_env_and_server + + ',quantile=~"$quantile",request="Fetch"}', legendFormat="{{" + server_label + "}} ({{quantile}}th)", ), ], @@ -835,14 +911,15 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Fetch: Local Time", - dataSource="${DS_PROMETHEUS}", + description="""Time expend doing local IO. + Moved from request queue to storage device operations by IO threads. + """, + dataSource=ds, targets=[ G.Target( expr="kafka_network_requestmetrics_localtimems{" - + env_label - + '="$env",' - + server_label - + '=~"$broker",quantile=~"$quantile",request="Fetch"}', + + by_env_and_server + + ',quantile=~"$quantile",request="Fetch"}', legendFormat="{{" + server_label + "}} ({{quantile}}th)", ), ], @@ -855,14 +932,15 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Fetch: Remote Time", - dataSource="${DS_PROMETHEUS}", + description="""Time expend waiting for coordination with other brokers/internal condition. + At purgatory. + """, + dataSource=ds, targets=[ G.Target( expr="kafka_network_requestmetrics_remotetimems{" - + env_label - + '="$env",' - + server_label - + '=~"$broker",quantile=~"$quantile",request="Fetch"}', + + by_env_and_server + + ',quantile=~"$quantile",request="Fetch"}', legendFormat="{{" + server_label + "}} ({{quantile}}th)", ), ], @@ -875,14 +953,15 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Fetch: Response Queue Time", - dataSource="${DS_PROMETHEUS}", + description="""Time expend waiting in response queue. + Moved from purgatory to response queue by IO threads. + """, + dataSource=ds, targets=[ G.Target( expr="kafka_network_requestmetrics_responsequeuetimems{" - + env_label - + '="$env",' - + server_label - + '=~"$broker",quantile=~"$quantile",request="Fetch"}', + + by_env_and_server + + ',quantile=~"$quantile",request="Fetch"}', legendFormat="{{" + server_label + "}} ({{quantile}}th)", ), ], @@ -895,14 +974,15 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Fetch: Response Send Time", - dataSource="${DS_PROMETHEUS}", + description="""Time expend delivering response. + Moved from response queue to client by Networkc threads. + """, + dataSource=ds, targets=[ G.Target( expr="kafka_network_requestmetrics_responsesendtimems{" - + env_label - + '="$env",' - + server_label - + '=~"$broker",quantile=~"$quantile",request="Fetch"}', + + by_env_and_server + + ',quantile=~"$quantile",request="Fetch"}', legendFormat="{{" + server_label + "}} ({{quantile}}th)", ), ], @@ -923,18 +1003,21 @@ def dashboard(env_label="namespace", server_label="pod"): ), ] + ## Request latency for Follower Fetch: + ### When changing these panels, also modify Produce and Consumer Fetch. replication_base = consumer_base + 2 replication_inner = [ G.TimeSeries( title="Fetch: Request Queue Time", - dataSource="${DS_PROMETHEUS}", + description="""Time expend on the request queue. + Moved from network socket to request queue by Network threads. + """, + dataSource=ds, targets=[ G.Target( expr="kafka_network_requestmetrics_requestqueuetimems{" - + env_label - + '="$env",' - + server_label - + '=~"$broker",quantile=~"$quantile",request="FetchFollower"}', + + by_env_and_server + + ',quantile=~"$quantile",request="FetchFollower"}', legendFormat="{{" + server_label + "}} ({{quantile}}th)", ), ], @@ -947,14 +1030,15 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Fetch: Local Time", - dataSource="${DS_PROMETHEUS}", + description="""Time expend doing local IO. + Moved from request queue to storage device operations by IO threads. + """, + dataSource=ds, targets=[ G.Target( expr="kafka_network_requestmetrics_localtimems{" - + env_label - + '="$env",' - + server_label - + '=~"$broker",quantile=~"$quantile",request="FetchFollower"}', + + by_env_and_server + + ',quantile=~"$quantile",request="FetchFollower"}', legendFormat="{{" + server_label + "}} ({{quantile}}th)", ), ], @@ -967,14 +1051,15 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Fetch: Remote Time", - dataSource="${DS_PROMETHEUS}", + description="""Time expend waiting for coordination with other brokers/internal condition. + At purgatory. + """, + dataSource=ds, targets=[ G.Target( expr="kafka_network_requestmetrics_remotetimems{" - + env_label - + '="$env",' - + server_label - + '=~"$broker",quantile=~"$quantile",request="FetchFollower"}', + + by_env_and_server + + ',quantile=~"$quantile",request="FetchFollower"}', legendFormat="{{" + server_label + "}} ({{quantile}}th)", ), ], @@ -987,14 +1072,15 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Fetch: Response Queue Time", - dataSource="${DS_PROMETHEUS}", + description="""Time expend waiting in response queue. + Moved from purgatory to response queue by IO threads. + """, + dataSource=ds, targets=[ G.Target( expr="kafka_network_requestmetrics_responsequeuetimems{" - + env_label - + '="$env",' - + server_label - + '=~"$broker",quantile=~"$quantile",request="FetchFollower"}', + + by_env_and_server + + ',quantile=~"$quantile",request="FetchFollower"}', legendFormat="{{" + server_label + "}} ({{quantile}}th)", ), ], @@ -1007,14 +1093,15 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Fetch: Response Send Time", - dataSource="${DS_PROMETHEUS}", + description="""Time expend delivering response. + Moved from response queue to client by Networkc threads. + """, + dataSource=ds, targets=[ G.Target( expr="kafka_network_requestmetrics_responsesendtimems{" - + env_label - + '="$env",' - + server_label - + '=~"$broker",quantile=~"$quantile",request="FetchFollower"}', + + by_env_and_server + + ',quantile=~"$quantile",request="FetchFollower"}', legendFormat="{{" + server_label + "}} ({{quantile}}th)", ), ], @@ -1035,18 +1122,18 @@ def dashboard(env_label="namespace", server_label="pod"): ), ] + ## Group Coordination: group_base = replication_base + 2 group_inner = [ G.TimeSeries( title="Number of Groups per Broker", - dataSource="${DS_PROMETHEUS}", + description="Number of groups managed by Broker", + dataSource=ds, targets=[ G.Target( expr="kafka_coordinator_group_groupmetadatamanager_numgroups{" - + env_label - + '="$env",' - + server_label - + '=~"$broker"}', + + by_env_and_server + + "}", legendFormat="{{" + server_label + "}}", ), ], @@ -1057,47 +1144,38 @@ def dashboard(env_label="namespace", server_label="pod"): ), ), G.TimeSeries( - title="Number of Groups per Broker", - dataSource="${DS_PROMETHEUS}", + title="Number of Groups per Broker per Status", + description="Number of stable groups managed by Broker", + dataSource=ds, targets=[ G.Target( expr="sum(kafka_coordinator_group_groupmetadatamanager_numgroupsstable{" - + env_label - + '="$env",' - + server_label - + '=~"$broker"})', + + by_env_and_server + + "})", legendFormat="stable", ), G.Target( expr="sum(kafka_coordinator_group_groupmetadatamanager_numgroupspreparingrebalance{" - + env_label - + '="$env",' - + server_label - + '=~"$broker"})', + + by_env_and_server + + "})", legendFormat="preparing_rebalance", ), G.Target( expr="sum(kafka_coordinator_group_groupmetadatamanager_numgroupsdead{" - + env_label - + '="$env",' - + server_label - + '=~"$broker"})', + + by_env_and_server + + "})", legendFormat="dead", ), G.Target( expr="sum(kafka_coordinator_group_groupmetadatamanager_numgroupscompletingrebalance{" - + env_label - + '="$env",' - + server_label - + '=~"$broker"})', + + by_env_and_server + + "})", legendFormat="completing_rebalance", ), G.Target( expr="sum(kafka_coordinator_group_groupmetadatamanager_numgroupsempty{" - + env_label - + '="$env",' - + server_label - + '=~"$broker"})', + + by_env_and_server + + "})", legendFormat="empty", ), ], @@ -1118,18 +1196,20 @@ def dashboard(env_label="namespace", server_label="pod"): ), ] + ## Conversion: conversion_base = group_base + 1 conversion_inner = [ G.TimeSeries( title="Sum of Produce conversion rate per sec", - dataSource="${DS_PROMETHEUS}", + description="""Sum of produce message conversions per second. + This value increases when the broker receives produce messages from clients using older versions. + """, + dataSource=ds, targets=[ G.Target( expr="sum(kafka_server_brokertopicmetrics_producemessageconversionspersec{" - + env_label - + '="$env",' - + server_label - + '=~"$broker"})', + + by_env_and_server + + "})", legendFormat="{{" + server_label + "}}", ), ], @@ -1142,14 +1222,15 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Sum of Fetch conversion rate per sec", - dataSource="${DS_PROMETHEUS}", + description="""Sum of fetch message conversions per second. + This value increases when the broker receives fetch messages from clients using older versions. + """, + dataSource=ds, targets=[ G.Target( expr="sum(kafka_server_brokertopicmetrics_fetchmessageconversionspersec{" - + env_label - + '="$env",' - + server_label - + '=~"$broker"})', + + by_env_and_server + + "})", legendFormat="{{" + server_label + "}}", ), ], @@ -1162,14 +1243,14 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Sum of Connections per version", - dataSource="${DS_PROMETHEUS}", + description="""Sum of connections aggregated by client version and name. + """, + dataSource=ds, targets=[ G.Target( expr="sum(kafka_server_socketservermetrics_connections{" - + env_label - + '="$env",' - + server_label - + '=~"$broker"}) by (client_software_name,client_software_version)', + + by_env_and_server + + "}) by (client_software_name,client_software_version)", legendFormat="{{client_software_name}} (v{{client_software_version}})", ), ], @@ -1189,8 +1270,9 @@ def dashboard(env_label="namespace", server_label="pod"): ), ] + # group all panels panels = ( - healthcheck_panels + overview_panels + system_panels + throughput_panels + thread_panels @@ -1204,6 +1286,7 @@ def dashboard(env_label="namespace", server_label="pod"): + conversion_panels ) + # build dashboard return G.Dashboard( title="Kafka cluster - v2", description="Overview of the Kafka cluster", @@ -1223,6 +1306,10 @@ def dashboard(env_label="namespace", server_label="pod"): ).auto_panel_ids() +# main labels to customize dashboard +ds = os.environ.get("DATASOURCE", "Prometheus") env_label = os.environ.get("ENV_LABEL", "env") server_label = os.environ.get("SERVER_LABEL", "hostname") -dashboard = dashboard(env_label, server_label) + +# dashboard required by grafanalib +dashboard = dashboard(ds, env_label, server_label) diff --git a/grafana-dashboards/kafka-connect-cluster.py b/grafana-dashboards/kafka-connect-cluster.py index 0ff01bd2..472d8796 100644 --- a/grafana-dashboards/kafka-connect-cluster.py +++ b/grafana-dashboards/kafka-connect-cluster.py @@ -66,7 +66,9 @@ def dashboard( ), G.Stat( title="Connect: Online Workers", - dataSource="${DS_PROMETHEUS}", + description="""Kafka Connect online workers returning metrics. + """, + dataSource="Prometheus", targets=[ G.Target( expr="count(kafka_connect_app_info{" @@ -86,7 +88,9 @@ def dashboard( ), G.Stat( title="Connect: Sum of Total Tasks", - dataSource="${DS_PROMETHEUS}", + description="""Number of tasks deployed on Kafka Connect cluster. + """, + dataSource="Prometheus", targets=[ G.Target( expr="sum(kafka_connect_connect_worker_metrics_connector_total_task_count{" @@ -106,7 +110,10 @@ def dashboard( ), G.Stat( title="Connect: Sum of Running Tasks", - dataSource="${DS_PROMETHEUS}", + description="""Number of Running Tasks on the Kafka Connect cluster. + Ideally, this number should be equal to the total number of tasks deployed. + """, + dataSource="Prometheus", targets=[ G.Target( expr="sum(kafka_connect_connect_worker_metrics_connector_running_task_count{" @@ -127,7 +134,10 @@ def dashboard( ), G.Stat( title="Connect: Sum of Paused Tasks", - dataSource="${DS_PROMETHEUS}", + description="""Number of Paused Tasks on the Kafka Connect cluster. + Ideally, this number should be zero, as tasks should be running. + """, + dataSource="Prometheus", targets=[ G.Target( expr="sum(kafka_connect_connect_worker_metrics_connector_paused_task_count{" @@ -148,7 +158,11 @@ def dashboard( ), G.Stat( title="Connect: Sum of Failed Tasks", - dataSource="${DS_PROMETHEUS}", + description="""Number of Paused Tasks on the Kafka Connect cluster. + Ideally, this number should be zero, as tasks should be running. + It's recommended alerting when this value is higher than 0. + """, + dataSource="Prometheus", targets=[ G.Target( expr="sum(kafka_connect_connect_worker_metrics_connector_failed_task_count{" @@ -160,7 +174,7 @@ def dashboard( ], reduceCalc="last", thresholds=[ - G.Threshold(index=0, value=0.0, color="blue"), + G.Threshold(index=0, value=0.0, color="green"), G.Threshold(index=1, value=1.0, color="red"), ], gridPos=G.GridPos( @@ -169,7 +183,10 @@ def dashboard( ), G.Stat( title="Connect: Time since last rebalance", - dataSource="${DS_PROMETHEUS}", + description="""Informative value. Time since last rebalance. + When this value is continuously and repeatedly low means some connectors are failing and rebalancing is triggered constantly. + """, + dataSource="Prometheus", targets=[ G.Target( expr="kafka_connect_connect_worker_rebalance_metrics_time_since_last_rebalance_ms{" @@ -192,7 +209,7 @@ def dashboard( ), G.Table( title="Connect Workers", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="kafka_connect_app_info{" @@ -332,7 +349,7 @@ def dashboard( ), G.Table( title="Connectors", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="kafka_connect_connector_info{" @@ -413,7 +430,7 @@ def dashboard( ), G.TimeSeries( title="Tasks Running Ratio", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="kafka_connect_connector_task_metrics_running_ratio{" @@ -431,7 +448,7 @@ def dashboard( ), G.TimeSeries( title="Rebalance Latency", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="kafka_connect_connect_worker_rebalance_metrics_rebalance_avg_time_ms{" @@ -449,6 +466,8 @@ def dashboard( ), ] + ## System resources: + ### When updating descriptions on these panels, also update descriptions in other cluster dashboards system_base = hc_base + 4 system_panels = [ G.RowPanel( @@ -457,7 +476,10 @@ def dashboard( ), G.TimeSeries( title="CPU usage", - dataSource="${DS_PROMETHEUS}", + description="""Rate of CPU seconds used by the Java process. + 100% usage represents one core. + If there are multiple cores, the total capacity should be 100% * number_cores.""", + dataSource="Prometheus", targets=[ G.Target( expr="irate(process_cpu_seconds_total{" @@ -479,7 +501,8 @@ def dashboard( ), G.TimeSeries( title="Memory usage", - dataSource="${DS_PROMETHEUS}", + description="""Sum of JVM memory used, without including areas (e.g. heap size).""", + dataSource="Prometheus", targets=[ G.Target( expr="sum without(area)(jvm_memory_bytes_used{" @@ -501,7 +524,8 @@ def dashboard( ), G.TimeSeries( title="GC collection", - dataSource="${DS_PROMETHEUS}", + description="""Sum of seconds used by Garbage Collection.""", + dataSource="Prometheus", targets=[ G.Target( expr="sum without(gc)(irate(jvm_gc_collection_seconds_sum{" @@ -527,7 +551,7 @@ def dashboard( worker_inner = [ G.TimeSeries( title="Incoming Byte Rate", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="kafka_connect_connect_metrics_incoming_byte_rate{" @@ -549,7 +573,7 @@ def dashboard( ), G.TimeSeries( title="Outgoing Byte Rate", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="kafka_connect_connect_metrics_outgoing_byte_rate{" @@ -571,7 +595,7 @@ def dashboard( ), G.TimeSeries( title="IO Ratio", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="kafka_connect_connect_metrics_io_ratio{" @@ -593,7 +617,7 @@ def dashboard( ), G.TimeSeries( title="Network IO Rate", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="kafka_connect_connect_metrics_network_io_rate{" @@ -615,7 +639,7 @@ def dashboard( ), G.TimeSeries( title="Active Connections", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="kafka_connect_connect_metrics_connection_count{" @@ -636,7 +660,7 @@ def dashboard( ), G.TimeSeries( title="Authentications", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="kafka_connect_connect_metrics_successful_authentication_rate{" @@ -679,7 +703,7 @@ def dashboard( tasks_inner = [ G.TimeSeries( title="Batch Size (Avg.)", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="kafka_connect_connector_task_metrics_batch_size_avg{" @@ -701,7 +725,7 @@ def dashboard( ), G.TimeSeries( title="Batch Size (Max.)", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="kafka_connect_connector_task_metrics_batch_size_max{" @@ -723,7 +747,7 @@ def dashboard( ), G.TimeSeries( title="Offset commit success %", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="kafka_connect_connector_task_metrics_offset_commit_success_percentage{" @@ -745,7 +769,7 @@ def dashboard( ), G.TimeSeries( title="Offset commit avg. latency", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="kafka_connect_connector_task_metrics_offset_commit_avg_time_ms{" @@ -779,7 +803,7 @@ def dashboard( task_errors_inner = [ G.TimeSeries( title="Total Record Failures", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="kafka_connect_task_error_metrics_total_record_failures{" @@ -800,7 +824,7 @@ def dashboard( ), G.TimeSeries( title="Total Record Error", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="kafka_connect_task_error_metrics_total_record_errors{" @@ -821,7 +845,7 @@ def dashboard( ), G.TimeSeries( title="Total Records Skipped", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="kafka_connect_task_error_metrics_total_records_skipped{" @@ -842,7 +866,7 @@ def dashboard( ), G.TimeSeries( title="Total Errors Logged", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="kafka_connect_task_error_metrics_total_errors_logged{" @@ -863,7 +887,7 @@ def dashboard( ), G.TimeSeries( title="Total Retries", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="kafka_connect_task_error_metrics_total_retries{" @@ -884,7 +908,7 @@ def dashboard( ), G.TimeSeries( title="Dead Letter Topic Requests", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="kafka_connect_task_error_metrics_deadletterqueue_produce_requests{" @@ -917,7 +941,7 @@ def dashboard( source_inner = [ G.TimeSeries( title="Poll Batch Avg. Latency", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="kafka_connect_source_task_metrics_poll_batch_avg_time_ms{" @@ -939,7 +963,7 @@ def dashboard( ), G.TimeSeries( title="Poll Batch Max. Latency", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="kafka_connect_source_task_metrics_poll_batch_max_time_ms{" @@ -961,7 +985,7 @@ def dashboard( ), G.TimeSeries( title="Source Record Poll Rate", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="kafka_connect_source_task_metrics_source_record_poll_rate{" @@ -983,7 +1007,7 @@ def dashboard( ), G.TimeSeries( title="Source Record Write Rate", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="kafka_connect_source_task_metrics_source_record_write_rate{" @@ -1017,7 +1041,7 @@ def dashboard( sink_inner = [ G.TimeSeries( title="Put Batch Avg. Latency", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="kafka_connect_sink_task_metrics_put_batch_avg_time_ms{" @@ -1039,7 +1063,7 @@ def dashboard( ), G.TimeSeries( title="Put Batch Max. Latency", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="kafka_connect_sink_task_metrics_put_batch_max_time_ms{" @@ -1061,7 +1085,7 @@ def dashboard( ), G.TimeSeries( title="Partition Count", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="kafka_connect_sink_task_metrics_partition_count{" diff --git a/grafana-dashboards/kafka-consumer.py b/grafana-dashboards/kafka-consumer.py index 3a4a94cc..4697547c 100644 --- a/grafana-dashboards/kafka-consumer.py +++ b/grafana-dashboards/kafka-consumer.py @@ -50,7 +50,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.Stat( title="Record Consumed Rate", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -71,7 +71,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.Stat( title="Records Lag", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -92,7 +92,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.Stat( title="Rebalance Rate per hour", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -115,7 +115,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.Stat( title="Failed Rebalance Rate per hour", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -137,7 +137,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.Stat( title="Versions", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="count(kafka_consumer_app_info{" @@ -160,7 +160,7 @@ def dashboard(env_label="namespace", server_label="pod"): performance_inner = [ G.TimeSeries( title="Bytes Consumed Rate", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -182,7 +182,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Records Consumed Rate", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -204,7 +204,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Records Lag Max", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -223,10 +223,9 @@ def dashboard(env_label="namespace", server_label="pod"): h=default_height * 2, w=ts_width, x=ts_width * 2, y=performance_base ), ), - G.TimeSeries( title="Fetch Rate", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -247,7 +246,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Fetch Latency", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -278,7 +277,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Fetch Size", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -307,10 +306,9 @@ def dashboard(env_label="namespace", server_label="pod"): h=default_height * 2, w=ts_width, x=ts_width * 2, y=performance_base + 1 ), ), - G.TimeSeries( title="Fetch Throttle Time", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -353,7 +351,7 @@ def dashboard(env_label="namespace", server_label="pod"): group_inner = [ G.TimeSeries( title="Commit Rate", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -374,7 +372,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Join Rate", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -395,7 +393,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Sync Rate", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -414,10 +412,9 @@ def dashboard(env_label="namespace", server_label="pod"): h=default_height * 2, w=ts_width, x=ts_width * 2, y=performance_base + 0 ), ), - G.TimeSeries( title="Commit Latency", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -449,7 +446,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Join Time", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -481,7 +478,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Sync Time", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -511,10 +508,9 @@ def dashboard(env_label="namespace", server_label="pod"): h=default_height * 2, w=ts_width, x=ts_width * 2, y=performance_base + 1 ), ), - G.TimeSeries( title="Heartbeat Rate", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -535,7 +531,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Heartbeat Response Time (Max.)", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -557,7 +553,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Last Heartbeat Seconds Ago", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -577,10 +573,9 @@ def dashboard(env_label="namespace", server_label="pod"): h=default_height * 2, w=ts_width, x=ts_width * 2, y=performance_base + 2 ), ), - G.TimeSeries( title="Rebalance Rate Per Hour", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -611,7 +606,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Rebalance Latency", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -642,7 +637,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Assigned Partitions", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -675,7 +670,7 @@ def dashboard(env_label="namespace", server_label="pod"): connection_inner = [ G.TimeSeries( title="Connection Count", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -696,7 +691,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Connection Creation Rate", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -718,7 +713,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Connection Close Rate", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -740,7 +735,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="IO ratio", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -762,7 +757,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="IO wait ratio", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -784,7 +779,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Select Rate", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -806,7 +801,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="IO time avg.", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -828,7 +823,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="IO wait time avg.", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -862,7 +857,7 @@ def dashboard(env_label="namespace", server_label="pod"): per_broker_inner = [ G.TimeSeries( title="Incoming Byte Rate", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -886,7 +881,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Outgoing Byte Rate", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -910,7 +905,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Request Latency", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -946,7 +941,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Request Rate", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -970,7 +965,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Response Rate", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -1006,7 +1001,7 @@ def dashboard(env_label="namespace", server_label="pod"): per_topic_inner = [ G.TimeSeries( title="Bytes Consumed Rate per Topic", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -1016,9 +1011,7 @@ def dashboard(env_label="namespace", server_label="pod"): + '="$env",client_id=~"$client_id", ' + server_label + '=~"$server"})', - legendFormat="{{client_id}}@{{" - + server_label - + "}} <- {{topic}}", + legendFormat="{{client_id}}@{{" + server_label + "}} <- {{topic}}", ), ], legendDisplayMode="table", @@ -1030,7 +1023,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Records Consumed Rate per Topic", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -1040,9 +1033,7 @@ def dashboard(env_label="namespace", server_label="pod"): + '="$env",client_id=~"$client_id", ' + server_label + '=~"$server"})', - legendFormat="{{client_id}}@{{" - + server_label - + "}} <- {{topic}}", + legendFormat="{{client_id}}@{{" + server_label + "}} <- {{topic}}", ), ], legendDisplayMode="table", @@ -1052,10 +1043,9 @@ def dashboard(env_label="namespace", server_label="pod"): h=default_height * 2, w=ts_width, x=ts_width * 1, y=per_broker_base + 0 ), ), - G.TimeSeries( title="Fetch Size per Topic", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -1091,7 +1081,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Records per Request Avg. per Topic", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -1101,9 +1091,7 @@ def dashboard(env_label="namespace", server_label="pod"): + '="$env",client_id=~"$client_id", ' + server_label + '=~"$server"})', - legendFormat="{{client_id}}@{{" - + server_label - + "}} <- {{topic}}", + legendFormat="{{client_id}}@{{" + server_label + "}} <- {{topic}}", ), ], legendDisplayMode="table", diff --git a/grafana-dashboards/kafka-producer.py b/grafana-dashboards/kafka-producer.py index 7fbddc5a..261be8ec 100644 --- a/grafana-dashboards/kafka-producer.py +++ b/grafana-dashboards/kafka-producer.py @@ -49,7 +49,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.Stat( title="Record Send Rate", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -70,7 +70,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.Stat( title="Error Rate", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -92,7 +92,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.Stat( title="Retry Rate", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -115,7 +115,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.Stat( title="Versions", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="count(kafka_producer_app_info{" @@ -138,7 +138,7 @@ def dashboard(env_label="namespace", server_label="pod"): performance_inner = [ G.TimeSeries( title="Incoming Byte Rate", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -160,7 +160,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Outgoing Byte Rate", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -182,7 +182,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Metadata Age", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -204,7 +204,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Request Rate", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -226,7 +226,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Request in-flight", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -248,7 +248,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Records per Request (avg.)", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -269,7 +269,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Record Send Rate", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -291,7 +291,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Record Retry Rate", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -313,7 +313,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Record Error Rate", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -335,7 +335,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Record Size", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -367,7 +367,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Record Queue Time", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -399,7 +399,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Produce Throttle Time", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -431,7 +431,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Batch Size", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -463,7 +463,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Batch Split Rate", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -485,7 +485,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Compression Rate (avg.)", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -519,7 +519,7 @@ def dashboard(env_label="namespace", server_label="pod"): connection_inner = [ G.TimeSeries( title="Connection Count", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -540,7 +540,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Connection Creation Rate", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -562,7 +562,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Connection Close Rate", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -584,7 +584,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="IO ratio", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -606,7 +606,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="IO wait ratio", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -628,7 +628,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Select Rate", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -650,7 +650,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="IO time avg.", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -672,7 +672,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="IO wait time avg.", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -706,7 +706,7 @@ def dashboard(env_label="namespace", server_label="pod"): per_broker_inner = [ G.TimeSeries( title="Incoming Byte Rate", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -730,7 +730,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Outgoing Byte Rate", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -754,7 +754,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Request Latency", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -790,7 +790,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Request Rate", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -814,7 +814,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Response Rate", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -850,7 +850,7 @@ def dashboard(env_label="namespace", server_label="pod"): per_topic_inner = [ G.TimeSeries( title="Byte Rate", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -872,7 +872,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Compression Rate", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -894,7 +894,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Record Send Rate", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -916,7 +916,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Record Retry Rate", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -938,7 +938,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Record Error Rate", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" diff --git a/grafana-dashboards/kafka-quotas.py b/grafana-dashboards/kafka-quotas.py index 251f768a..654ecef0 100644 --- a/grafana-dashboards/kafka-quotas.py +++ b/grafana-dashboards/kafka-quotas.py @@ -50,7 +50,7 @@ def dashboard(env_label="namespace", server_label="pod"): panels = [ G.TimeSeries( title="Produce Byte Rate", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -68,13 +68,11 @@ def dashboard(env_label="namespace", server_label="pod"): legendDisplayMode="table", legendCalcs=["max", "mean", "last"], unit="binBps", - gridPos=G.GridPos( - h=default_height * 2, w=ts_width, x=ts_width * 0, y=0 - ), + gridPos=G.GridPos(h=default_height * 2, w=ts_width, x=ts_width * 0, y=0), ), G.TimeSeries( title="Fetch Byte Rate", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -92,13 +90,11 @@ def dashboard(env_label="namespace", server_label="pod"): legendDisplayMode="table", legendCalcs=["max", "mean", "last"], unit="binBps", - gridPos=G.GridPos( - h=default_height * 2, w=ts_width, x=ts_width * 1, y=0 - ), + gridPos=G.GridPos(h=default_height * 2, w=ts_width, x=ts_width * 1, y=0), ), G.TimeSeries( title="Request Time", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -116,15 +112,11 @@ def dashboard(env_label="namespace", server_label="pod"): legendDisplayMode="table", legendCalcs=["max", "mean", "last"], unit="percent", - gridPos=G.GridPos( - h=default_height * 2, w=ts_width, x=ts_width * 2, y=0 - ), + gridPos=G.GridPos(h=default_height * 2, w=ts_width, x=ts_width * 2, y=0), ), - - G.TimeSeries( title="Produce Throttle Time", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -142,13 +134,11 @@ def dashboard(env_label="namespace", server_label="pod"): legendDisplayMode="table", legendCalcs=["max", "mean", "last"], unit="ms", - gridPos=G.GridPos( - h=default_height * 2, w=ts_width, x=ts_width * 0, y=1 - ), + gridPos=G.GridPos(h=default_height * 2, w=ts_width, x=ts_width * 0, y=1), ), G.TimeSeries( title="Fetch Throttle Time", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -166,13 +156,11 @@ def dashboard(env_label="namespace", server_label="pod"): legendDisplayMode="table", legendCalcs=["max", "mean", "last"], unit="ms", - gridPos=G.GridPos( - h=default_height * 2, w=ts_width, x=ts_width * 1, y=1 - ), + gridPos=G.GridPos(h=default_height * 2, w=ts_width, x=ts_width * 1, y=1), ), G.TimeSeries( title="Request Throttle Time", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -190,9 +178,7 @@ def dashboard(env_label="namespace", server_label="pod"): legendDisplayMode="table", legendCalcs=["max", "mean", "last"], unit="ms", - gridPos=G.GridPos( - h=default_height * 2, w=ts_width, x=ts_width * 2, y=1 - ), + gridPos=G.GridPos(h=default_height * 2, w=ts_width, x=ts_width * 2, y=1), ), ] diff --git a/grafana-dashboards/kafka-topics.py b/grafana-dashboards/kafka-topics.py index b4481fe5..021998a0 100644 --- a/grafana-dashboards/kafka-topics.py +++ b/grafana-dashboards/kafka-topics.py @@ -39,7 +39,7 @@ def dashboard(env_label="namespace"): ), G.TimeSeries( title="Messages In/Sec", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -60,7 +60,7 @@ def dashboard(env_label="namespace"): ), G.TimeSeries( title="Log size", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -81,7 +81,7 @@ def dashboard(env_label="namespace"): ), G.TimeSeries( title="Bytes In/Sec", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -102,7 +102,7 @@ def dashboard(env_label="namespace"): ), G.TimeSeries( title="Bytes Out/Sec", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -123,7 +123,7 @@ def dashboard(env_label="namespace"): ), G.TimeSeries( title="Produce Requests/Sec", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -144,7 +144,7 @@ def dashboard(env_label="namespace"): ), G.TimeSeries( title="Consumer Fetch Requests/Sec", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="topk(" @@ -213,7 +213,7 @@ def dashboard(env_label="namespace"): offsets_inner = [ G.Table( title="Start Offsets", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="kafka_log_log_logstartoffset{" @@ -232,7 +232,7 @@ def dashboard(env_label="namespace"): ), G.Table( title="End Offsets", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="kafka_log_log_logendoffset{" diff --git a/grafana-dashboards/ksqldb-cluster.py b/grafana-dashboards/ksqldb-cluster.py index 01caa9e6..ea3c1443 100644 --- a/grafana-dashboards/ksqldb-cluster.py +++ b/grafana-dashboards/ksqldb-cluster.py @@ -59,7 +59,9 @@ def dashboard(env_label="namespace", server_label="pod", ksqldb_cluster_label="a ), G.Stat( title="ksqlDB: Online Servers", - dataSource="${DS_PROMETHEUS}", + description="""ksqlDB online instances returning metrics. + """, + dataSource="Prometheus", targets=[ G.Target( expr="count(ksql_ksql_engine_query_stats_num_active_queries{" @@ -79,7 +81,9 @@ def dashboard(env_label="namespace", server_label="pod", ksqldb_cluster_label="a ), G.Stat( title="ksqlDB: Sum of Active Queries", - dataSource="${DS_PROMETHEUS}", + description="""Number of active queries deployed in the cluster. + """, + dataSource="Prometheus", targets=[ G.Target( expr="sum(ksql_ksql_engine_query_stats_num_active_queries{" @@ -99,7 +103,10 @@ def dashboard(env_label="namespace", server_label="pod", ksqldb_cluster_label="a ), G.Stat( title="ksqlDB: Sum of Running Queries", - dataSource="${DS_PROMETHEUS}", + description="""Number of running queries deployed in the cluster. + Ideally, this number should be equal to the number of active queries as queries should be running. + """, + dataSource="Prometheus", targets=[ G.Target( expr="sum(ksql_ksql_engine_query_stats_running_queries{" @@ -120,7 +127,11 @@ def dashboard(env_label="namespace", server_label="pod", ksqldb_cluster_label="a ), G.Stat( title="ksqlDB: Sum of Rebalancing Queries", - dataSource="${DS_PROMETHEUS}", + description="""Number of queries rebalancing in the cluster. + Ideally, this number should be equal zero, or return to zero in a short period (e.g. 1 minute). + It's recommended to alert if the number of rebalancing queries stay higher than 0 for a longer period of time. + """, + dataSource="Prometheus", targets=[ G.Target( expr="sum(ksql_ksql_engine_query_stats_rebalancing_queries{" @@ -140,8 +151,12 @@ def dashboard(env_label="namespace", server_label="pod", ksqldb_cluster_label="a ), ), G.Stat( - title="Connect: Sum of Failed Queries", - dataSource="${DS_PROMETHEUS}", + title="Connect: Sum of Queries Failed", + description="""Number of queries failed in the cluster. + Ideally, this number should be equal zero. + It's recommended to alert if the number of queries failed is higher than 0. + """, + dataSource="Prometheus", targets=[ G.Target( expr="avg(ksql_ksql_engine_query_stats_error_queries{" @@ -162,7 +177,7 @@ def dashboard(env_label="namespace", server_label="pod", ksqldb_cluster_label="a ), G.TimeSeries( title="Cluster Liveness", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="ksql_ksql_engine_query_stats_liveness_indicator{" @@ -179,7 +194,7 @@ def dashboard(env_label="namespace", server_label="pod", ksqldb_cluster_label="a ), G.TimeSeries( title="Messages consumed/sec", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="ksql_ksql_engine_query_stats_messages_consumed_per_sec{" @@ -199,7 +214,7 @@ def dashboard(env_label="namespace", server_label="pod", ksqldb_cluster_label="a ), G.TimeSeries( title="Messages produced/sec", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="ksql_ksql_engine_query_stats_messages_produced_per_sec{" @@ -219,6 +234,8 @@ def dashboard(env_label="namespace", server_label="pod", ksqldb_cluster_label="a ), ] + ## System resources: + ### When updating descriptions on these panels, also update descriptions in other cluster dashboards system_base = hc_base + 2 system_panels = [ G.RowPanel( @@ -227,7 +244,10 @@ def dashboard(env_label="namespace", server_label="pod", ksqldb_cluster_label="a ), G.TimeSeries( title="CPU usage", - dataSource="${DS_PROMETHEUS}", + description="""Rate of CPU seconds used by the Java process. + 100% usage represents one core. + If there are multiple cores, the total capacity should be 100% * number_cores.""", + dataSource="Prometheus", targets=[ G.Target( expr="irate(process_cpu_seconds_total{" @@ -249,7 +269,8 @@ def dashboard(env_label="namespace", server_label="pod", ksqldb_cluster_label="a ), G.TimeSeries( title="Memory usage", - dataSource="${DS_PROMETHEUS}", + description="""Sum of JVM memory used, without including areas (e.g. heap size).""", + dataSource="Prometheus", targets=[ G.Target( expr="sum without(area)(jvm_memory_bytes_used{" @@ -271,7 +292,8 @@ def dashboard(env_label="namespace", server_label="pod", ksqldb_cluster_label="a ), G.TimeSeries( title="GC collection", - dataSource="${DS_PROMETHEUS}", + description="""Sum of seconds used by Garbage Collection.""", + dataSource="Prometheus", targets=[ G.Target( expr="sum without(gc)(irate(jvm_gc_collection_seconds_sum{" @@ -297,7 +319,7 @@ def dashboard(env_label="namespace", server_label="pod", ksqldb_cluster_label="a queries_inner = [ G.TimeSeries( title="Poll Latency (Avg.)", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="kafka_streams_stream_thread_metrics_poll_latency_avg{" @@ -319,7 +341,7 @@ def dashboard(env_label="namespace", server_label="pod", ksqldb_cluster_label="a ), G.TimeSeries( title="Poll Latency (Max.)", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="kafka_streams_stream_thread_metrics_poll_latency_max{" @@ -341,7 +363,7 @@ def dashboard(env_label="namespace", server_label="pod", ksqldb_cluster_label="a ), G.TimeSeries( title="Process Latency (Avg.)", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="kafka_streams_stream_thread_metrics_process_latency_avg{" @@ -363,7 +385,7 @@ def dashboard(env_label="namespace", server_label="pod", ksqldb_cluster_label="a ), G.TimeSeries( title="Process Latency (Max.)", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="kafka_streams_stream_thread_metrics_process_latency_max{" @@ -385,7 +407,7 @@ def dashboard(env_label="namespace", server_label="pod", ksqldb_cluster_label="a ), G.TimeSeries( title="Commit Latency (Avg.)", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="kafka_streams_stream_thread_metrics_commit_latency_avg{" @@ -407,7 +429,7 @@ def dashboard(env_label="namespace", server_label="pod", ksqldb_cluster_label="a ), G.TimeSeries( title="Commit Latency (Max.)", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="kafka_streams_stream_thread_metrics_commit_latency_max{" @@ -429,7 +451,7 @@ def dashboard(env_label="namespace", server_label="pod", ksqldb_cluster_label="a ), G.TimeSeries( title="Punctuate Latency (Avg.)", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="kafka_streams_stream_thread_metrics_punctuate_latency_avg{" @@ -451,7 +473,7 @@ def dashboard(env_label="namespace", server_label="pod", ksqldb_cluster_label="a ), G.TimeSeries( title="Punctuate Latency (Max.)", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="kafka_streams_stream_thread_metrics_punctuate_latency_max{" @@ -485,7 +507,7 @@ def dashboard(env_label="namespace", server_label="pod", ksqldb_cluster_label="a stores_inner = [ G.TimeSeries( title="Put Rate", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="kafka_streams_stream_state_metrics_put_rate{" @@ -507,7 +529,7 @@ def dashboard(env_label="namespace", server_label="pod", ksqldb_cluster_label="a ), G.TimeSeries( title="Put Latency (Avg.)", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="kafka_streams_stream_state_metrics_put_latency_avg{" @@ -529,7 +551,7 @@ def dashboard(env_label="namespace", server_label="pod", ksqldb_cluster_label="a ), G.TimeSeries( title="Put Latency (Max.)", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="kafka_streams_stream_state_metrics_put_latency_max{" @@ -551,7 +573,7 @@ def dashboard(env_label="namespace", server_label="pod", ksqldb_cluster_label="a ), G.TimeSeries( title="Put if absent Rate", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="kafka_streams_stream_state_metrics_put_if_absent_rate{" @@ -573,7 +595,7 @@ def dashboard(env_label="namespace", server_label="pod", ksqldb_cluster_label="a ), G.TimeSeries( title="Put if absent Latency (Avg.)", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="kafka_streams_stream_state_metrics_put_if_absent_latency_avg{" @@ -595,7 +617,7 @@ def dashboard(env_label="namespace", server_label="pod", ksqldb_cluster_label="a ), G.TimeSeries( title="Put if absent Latency (Max.)", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="kafka_streams_stream_state_metrics_put_if_absent_latency_max{" @@ -617,7 +639,7 @@ def dashboard(env_label="namespace", server_label="pod", ksqldb_cluster_label="a ), G.TimeSeries( title="Fetch Rate", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="kafka_streams_stream_state_metrics_fetch_rate{" @@ -639,7 +661,7 @@ def dashboard(env_label="namespace", server_label="pod", ksqldb_cluster_label="a ), G.TimeSeries( title="Fetch Latency (Avg.)", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="kafka_streams_stream_state_metrics_fetch_latency_avg{" @@ -661,7 +683,7 @@ def dashboard(env_label="namespace", server_label="pod", ksqldb_cluster_label="a ), G.TimeSeries( title="Fetch Latency (Max.)", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="kafka_streams_stream_state_metrics_fetch_latency_max{" @@ -683,7 +705,7 @@ def dashboard(env_label="namespace", server_label="pod", ksqldb_cluster_label="a ), G.TimeSeries( title="Delete Rate", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="kafka_streams_stream_state_metrics_delete_rate{" @@ -705,7 +727,7 @@ def dashboard(env_label="namespace", server_label="pod", ksqldb_cluster_label="a ), G.TimeSeries( title="Delete Latency (Avg.)", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="kafka_streams_stream_state_metrics_delete_latency_avg{" @@ -727,7 +749,7 @@ def dashboard(env_label="namespace", server_label="pod", ksqldb_cluster_label="a ), G.TimeSeries( title="Delete Latency (Max.)", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="kafka_streams_stream_state_metrics_delete_latency_max{" @@ -749,7 +771,7 @@ def dashboard(env_label="namespace", server_label="pod", ksqldb_cluster_label="a ), G.TimeSeries( title="Restore Rate", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="kafka_streams_stream_state_metrics_restore_rate{" @@ -771,7 +793,7 @@ def dashboard(env_label="namespace", server_label="pod", ksqldb_cluster_label="a ), G.TimeSeries( title="Restore Latency (Avg.)", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="kafka_streams_stream_state_metrics_restore_latency_avg{" @@ -793,7 +815,7 @@ def dashboard(env_label="namespace", server_label="pod", ksqldb_cluster_label="a ), G.TimeSeries( title="Restore Latency (Max.)", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="kafka_streams_stream_state_metrics_restore_latency_max{" diff --git a/grafana-dashboards/schema-registry-cluster.py b/grafana-dashboards/schema-registry-cluster.py index 226363b4..233df469 100644 --- a/grafana-dashboards/schema-registry-cluster.py +++ b/grafana-dashboards/schema-registry-cluster.py @@ -38,7 +38,9 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.Stat( title="SR: Online instances", - dataSource="${DS_PROMETHEUS}", + description="""Schema Registry online instances returning metrics. + """, + dataSource="Prometheus", targets=[ G.Target( expr="count(kafka_schema_registry_registered_count{" @@ -57,14 +59,15 @@ def dashboard(env_label="namespace", server_label="pod"): ), ), G.Stat( - title="SR: Sum of Registered Schemas", - dataSource="${DS_PROMETHEUS}", + title="SR: Registered Schemas (avg.)", + description="""Average number of registered schemas across the cluster. + """, + dataSource="Prometheus", targets=[ G.Target( expr="avg(kafka_schema_registry_registered_count{" + env_label + '="$env"})', - instant=True, ), ], reduceCalc="last", @@ -76,8 +79,10 @@ def dashboard(env_label="namespace", server_label="pod"): ), ), G.Stat( - title="SR: Sum of Created Schemas by Type", - dataSource="${DS_PROMETHEUS}", + title="SR: Created Schemas by Type (avg.)", + description="""Average number of schemas created, by type. + """, + dataSource="Prometheus", targets=[ G.Target( expr="avg(kafka_schema_registry_schemas_created{" @@ -96,7 +101,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.Stat( title="SR: Sum of Deleted Schemas by Type", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="sum(kafka_schema_registry_schemas_deleted{" @@ -115,7 +120,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.Stat( title="SR: Sum of Active Connections", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="sum(kafka_schema_registry_kafka_schema_registry_metrics_connection_count)", @@ -138,7 +143,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="CPU usage", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="irate(process_cpu_seconds_total{" @@ -156,7 +161,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Memory usage", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="sum without(area)(jvm_memory_bytes_used{" @@ -174,7 +179,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="GC collection", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="sum without(gc)(irate(jvm_gc_collection_seconds_sum{" diff --git a/grafana-dashboards/zookeeper-cluster.py b/grafana-dashboards/zookeeper-cluster.py index 2925c76f..c0dd71d3 100644 --- a/grafana-dashboards/zookeeper-cluster.py +++ b/grafana-dashboards/zookeeper-cluster.py @@ -45,7 +45,10 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.Stat( title="ZK: Quorum Size", - dataSource="${DS_PROMETHEUS}", + description="""Quorum Size of Zookeeper ensemble. + Count Zookeeper servers with quorum size metric. + """, + dataSource="Prometheus", targets=[ G.Target( expr="count(zookeeper_status_quorumsize{" + env_label + '="$env"})', @@ -60,8 +63,11 @@ def dashboard(env_label="namespace", server_label="pod"): gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 0, y=0), ), G.Stat( - title="ZK: Avg. number of ZNodes", - dataSource="${DS_PROMETHEUS}", + title="ZK: ZNodes (avg.)", + description="""Average size of ZNodes in the cluster. + Getting the node count per server, and averaging the node count. + """, + dataSource="Prometheus", targets=[ G.Target( expr="avg(zookeeper_inmemorydatatree_nodecount{" @@ -76,22 +82,34 @@ def dashboard(env_label="namespace", server_label="pod"): gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 1, y=0), ), G.Stat( - title="ZK: Sum of number of Alive Connections", - dataSource="${DS_PROMETHEUS}", + title="ZK: Connections used", + description="""Sum of the number of alive connections per servers divided by the maximum number of client connections allowed per host. + If the percentage is higher than 60%, then Zookeeper should be scaled and/or the Zookeeper clients should be investigated to find the reason for high number of connections opened. + """, + dataSource="Prometheus", targets=[ G.Target( - expr="sum(zookeeper_numaliveconnections{" + env_label + '="$env"})', + expr="zookeeper_numaliveconnections{" + + env_label + + '="$env"} / zookeeper_maxclientcnxnsperhost{' + + env_label + + '="$env"}', ), ], reduceCalc="last", thresholds=[ - G.Threshold(index=0, value=0.0, color="blue"), + G.Threshold(index=0, value=0.0, color="green"), + G.Threshold(index=1, value=0.6, color="yellow"), + G.Threshold(index=2, value=0.8, color="red"), ], + format="percentunit", gridPos=G.GridPos(h=default_height, w=stat_width, x=stat_width * 2, y=0), ), G.Stat( title="ZK: Sum of watchers", - dataSource="${DS_PROMETHEUS}", + description="""Sum of client watchers subscribed to changes on the ZNodes. + """, + dataSource="Prometheus", targets=[ G.Target( expr="sum(zookeeper_inmemorydatatree_watchcount{" @@ -107,7 +125,11 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="ZK: Outstanding Requests", - dataSource="${DS_PROMETHEUS}", + description="""Number of requests waiting for processing (queued). + If the number of outstanding requests grows higher than 10, then the Zookeeper hosts should be checked. + It could mean that there is not enough resources to cope with the number of requests. + """, + dataSource="Prometheus", targets=[ G.Target( expr="zookeeper_outstandingrequests{" + env_label + '="$env"}', @@ -120,6 +142,11 @@ def dashboard(env_label="namespace", server_label="pod"): legendCalcs=["max", "last"], legendPlacement="right", gridPos=G.GridPos(h=default_height, w=ts_width, x=stat_width * 4, y=0), + thresholds=[ + G.Threshold(index=0, value=0.0, color="green"), + G.Threshold(index=1, value=1.0, color="yellow"), + G.Threshold(index=2, value=10.0, color="red"), + ], ), ] @@ -130,7 +157,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="CPU usage", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="irate(process_cpu_seconds_total{" @@ -148,7 +175,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Memory usage", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="sum without(area)(jvm_memory_bytes_used{" @@ -166,7 +193,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="GC collection", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="sum without(gc)(irate(jvm_gc_collection_seconds_sum{" @@ -189,7 +216,7 @@ def dashboard(env_label="namespace", server_label="pod"): latency_inner = [ G.TimeSeries( title="ZK: Request Latency (Minimum)", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="zookeeper_minrequestlatency{" @@ -205,7 +232,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="ZK: Request Latency (Average)", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="zookeeper_avgrequestlatency{" @@ -221,7 +248,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="ZK: Request Latency (Maximum)", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="zookeeper_maxrequestlatency{" @@ -249,7 +276,7 @@ def dashboard(env_label="namespace", server_label="pod"): kafka_inner = [ G.TimeSeries( title="Kafka: Request Latency", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="kafka_server_zookeeperclientmetrics_zookeeperrequestlatencyms{" @@ -267,7 +294,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Kafka: Sync Connections/sec", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="kafka_server_sessionexpirelistener_zookeepersyncconnectspersec{" @@ -286,7 +313,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Kafka: Expired Connections/sec", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="kafka_server_sessionexpirelistener_zookeeperexpirespersec{" @@ -305,7 +332,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Kafka: Disconnected Connections/sec", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="kafka_server_sessionexpirelistener_zookeeperdisconnectspersec{" @@ -324,7 +351,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Kafka: Auth Failures on Connections/sec", - dataSource="${DS_PROMETHEUS}", + dataSource="Prometheus", targets=[ G.Target( expr="kafka_server_sessionexpirelistener_zookeeperauthfailurespersec{" From ae6b23949155c0f9b1140d6c7f5ca854f80c6407 Mon Sep 17 00:00:00 2001 From: Jorge Esteban Quilcate Otoya Date: Tue, 12 Jul 2022 14:23:33 +0100 Subject: [PATCH 24/28] feat: modify existing dashboards --- .../grafana/confluent-platform.json | 3324 +++++---- .../grafana/kafka-cluster.json | 5985 ++++++++--------- .../grafana/kafka-connect-cluster.json | 5710 ++++++---------- .../grafana}/kafka-consumer.json | 0 .../grafana}/kafka-producer.json | 0 .../grafana}/kafka-quotas.json | 0 .../grafana/kafka-topics.json | 1062 +-- .../grafana/ksqldb-cluster.json | 3369 +++++----- .../grafana/schema-registry-cluster.json | 1097 ++- .../grafana/zookeeper-cluster.json | 1956 ++++-- grafana-dashboards/Makefile | 4 +- .../cfk/confluent-platform.json | 2681 -------- grafana-dashboards/cfk/kafka-cluster.json | 5537 --------------- .../cfk/kafka-connect-cluster.json | 4184 ------------ grafana-dashboards/cfk/kafka-topics.json | 1085 --- grafana-dashboards/cfk/ksqldb-cluster.json | 3683 ---------- .../cfk/schema-registry-cluster.json | 937 --- grafana-dashboards/cfk/zookeeper-cluster.json | 1914 ------ .../default/confluent-platform.json | 2681 -------- grafana-dashboards/default/kafka-cluster.json | 5537 --------------- .../default/kafka-connect-cluster.json | 4184 ------------ .../default/kafka-consumer.json | 4582 ------------- .../default/kafka-producer.json | 4087 ----------- grafana-dashboards/default/kafka-quotas.json | 779 --- grafana-dashboards/default/kafka-topics.json | 1085 --- .../default/ksqldb-cluster.json | 3683 ---------- .../default/schema-registry-cluster.json | 937 --- .../default/zookeeper-cluster.json | 1914 ------ .../dashboards/confluent-platform.json | 3174 +++++---- .../dashboards/kafka-cluster.json | 5881 ++++++++-------- .../dashboards/kafka-connect-cluster.json | 5706 ++++++---------- .../dashboards/kafka-consumer.json | 4574 ++++++++++--- .../dashboards/kafka-producer.json | 3841 +++++++---- .../provisioning/dashboards/kafka-quotas.json | 603 +- .../provisioning/dashboards/kafka-topics.json | 1058 +-- .../dashboards/ksqldb-cluster.json | 3104 ++++----- .../dashboards/schema-registry-cluster.json | 1093 ++- .../dashboards/zookeeper-cluster.json | 1956 ++++-- 38 files changed, 28251 insertions(+), 74736 deletions(-) rename {grafana-dashboards/cfk => cfk-prometheus-grafana/grafana}/kafka-consumer.json (100%) rename {grafana-dashboards/cfk => cfk-prometheus-grafana/grafana}/kafka-producer.json (100%) rename {grafana-dashboards/cfk => cfk-prometheus-grafana/grafana}/kafka-quotas.json (100%) delete mode 100644 grafana-dashboards/cfk/confluent-platform.json delete mode 100644 grafana-dashboards/cfk/kafka-cluster.json delete mode 100644 grafana-dashboards/cfk/kafka-connect-cluster.json delete mode 100644 grafana-dashboards/cfk/kafka-topics.json delete mode 100644 grafana-dashboards/cfk/ksqldb-cluster.json delete mode 100644 grafana-dashboards/cfk/schema-registry-cluster.json delete mode 100644 grafana-dashboards/cfk/zookeeper-cluster.json delete mode 100644 grafana-dashboards/default/confluent-platform.json delete mode 100644 grafana-dashboards/default/kafka-cluster.json delete mode 100644 grafana-dashboards/default/kafka-connect-cluster.json delete mode 100644 grafana-dashboards/default/kafka-consumer.json delete mode 100644 grafana-dashboards/default/kafka-producer.json delete mode 100644 grafana-dashboards/default/kafka-quotas.json delete mode 100644 grafana-dashboards/default/kafka-topics.json delete mode 100644 grafana-dashboards/default/ksqldb-cluster.json delete mode 100644 grafana-dashboards/default/schema-registry-cluster.json delete mode 100644 grafana-dashboards/default/zookeeper-cluster.json diff --git a/cfk-prometheus-grafana/grafana/confluent-platform.json b/cfk-prometheus-grafana/grafana/confluent-platform.json index d574b808..d94f8840 100644 --- a/cfk-prometheus-grafana/grafana/confluent-platform.json +++ b/cfk-prometheus-grafana/grafana/confluent-platform.json @@ -1,80 +1,103 @@ { + "__inputs": [ + { + "description": "", + "label": "Prometheus", + "name": "DS_PROMETHEUS", + "pluginId": "prometheus", + "pluginName": "Prometheus", + "type": "datasource" + } + ], "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "target": { - "limit": 100, - "matchAny": false, - "tags": [], - "type": "dashboard" - }, - "type": "dashboard" - } - ] + "list": [] }, + "description": "Overview of the main health-check metrics from Confluent Platform components.", "editable": true, - "fiscalYearStartMonth": 0, - "graphTooltip": 0, - "id": 11, - "iteration": 1647519271805, + "gnetId": null, + "hideControls": false, + "id": null, "links": [], - "liveNow": false, "panels": [ { + "cacheTimeout": null, "collapsed": false, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }, - "id": 35, + "height": null, + "hideTimeOverride": false, + "id": 1, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [], - "title": "Zookeeper", + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Zookeeper cluster", + "transformations": [], + "transparent": false, "type": "row" }, { - "datasource": { - "type": "prometheus", - "uid": "a65Bu5Enk" - }, - "description": "Quorum Size of Zookeeper ensemble", + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Quorum Size of Zookeeper ensemble.\n Count Zookeeper servers with quorum size metric.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#d44a3a", - "value": null + "color": "red", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" }, { - "color": "rgba(237, 129, 40, 0.89)", - "value": 2 + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 2.0, + "yaxis": "left" }, { - "color": "#299c46", - "value": 3 + "color": "green", + "index": 2, + "line": true, + "op": "gt", + "value": 3.0, + "yaxis": "left" } ] }, @@ -83,69 +106,81 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 0, - "y": 1 + "y": 0 }, - "id": 16, + "height": null, + "hideTimeOverride": false, + "id": 2, + "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.4.2", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "count(zookeeper_status_quorumsize{namespace=\"$ns\"})", + "datasource": null, + "expr": "count(zookeeper_status_quorumsize{namespace=\"$env\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Zookeeper nodes online", + "timeFrom": null, + "timeShift": null, + "title": "ZK: Quorum Size", + "transformations": [], + "transparent": false, "type": "stat" }, { - "datasource": { - "type": "prometheus", - "uid": "a65Bu5Enk" - }, - "description": "", + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Average size of ZNodes in the cluster.\n Getting the node count per server, and averaging the node count.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] }, @@ -154,156 +189,180 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 4, - "y": 1 + "y": 0 }, - "id": 18, + "height": null, + "hideTimeOverride": false, + "id": 3, + "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.4.2", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "avg(zookeeper_inmemorydatatree_nodecount{namespace=\"$ns\"})", + "datasource": null, + "expr": "avg(zookeeper_inmemorydatatree_nodecount{namespace=\"$env\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Number of ZNodes", + "timeFrom": null, + "timeShift": null, + "title": "ZK: ZNodes (avg.)", + "transformations": [], + "transparent": false, "type": "stat" }, { - "datasource": { - "type": "prometheus", - "uid": "a65Bu5Enk" - }, - "description": "Number of Alive Connections", + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Sum of the number of alive connections per servers divided by the maximum number of client connections allowed per host.\n If the percentage is higher than 60%, then Zookeeper should be scaled and/or the Zookeeper clients should be investigated to find the reason for high number of connections opened.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#299c46", - "value": null + "color": "green", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" }, { - "color": "rgba(237, 129, 40, 0.89)", - "value": 100 + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 0.6, + "yaxis": "left" }, { - "color": "#d44a3a", - "value": 200 + "color": "red", + "index": 2, + "line": true, + "op": "gt", + "value": 0.8, + "yaxis": "left" } ] }, - "unit": "none" + "unit": "percentunit" }, "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 8, - "y": 1 + "y": 0 }, - "id": 20, + "height": null, + "hideTimeOverride": false, + "id": 4, + "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.4.2", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(zookeeper_numaliveconnections{namespace=\"$ns\"})", + "datasource": null, + "expr": "zookeeper_numaliveconnections{namespace=\"$env\"} / zookeeper_maxclientcnxnsperhost{namespace=\"$env\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Alive Connections", + "timeFrom": null, + "timeShift": null, + "title": "ZK: Connections used", + "transformations": [], + "transparent": false, "type": "stat" }, { - "datasource": { - "type": "prometheus", - "uid": "a65Bu5Enk" - }, - "description": "Number of Watchers", + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Sum of client watchers subscribed to changes on the ZNodes.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#299c46", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 500 - }, - { - "color": "#d44a3a", - "value": 1000 + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] }, @@ -312,163 +371,246 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 12, - "y": 1 + "y": 0 }, - "id": 22, + "height": null, + "hideTimeOverride": false, + "id": 5, + "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.4.2", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(zookeeper_inmemorydatatree_watchcount{namespace=\"$ns\"})", + "datasource": null, + "expr": "sum(zookeeper_inmemorydatatree_watchcount{namespace=\"$env\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Number of Watchers", + "timeFrom": null, + "timeShift": null, + "title": "ZK: Sum of watchers", + "transformations": [], + "transparent": false, "type": "stat" }, { - "description": "Number of Alive Connections", + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Number of requests waiting for processing (queued).\n If the number of outstanding requests grows higher than 10, then the Zookeeper hosts should be checked.\n It could mean that there is not enough resources to cope with the number of requests.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { - "mode": "thresholds" + "mode": "palette-classic" }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" } - ], + }, + "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { - "color": "#299c46", - "value": null + "color": "green", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" }, { - "color": "rgba(237, 129, 40, 0.89)", - "value": 100 + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" }, { - "color": "#d44a3a", - "value": 200 + "color": "red", + "index": 2, + "line": true, + "op": "gt", + "value": 10.0, + "yaxis": "left" } ] }, - "unit": "none" + "unit": "" }, "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 8, "x": 16, - "y": 1 + "y": 0 }, - "id": 24, + "height": null, + "hideTimeOverride": false, + "id": 6, + "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "reduceOptions": { + "legend": { "calcs": [ - "lastNotNull" + "max", + "last" ], - "fields": "", - "values": false + "displayMode": "table", + "placement": "right" }, - "text": {}, - "textMode": "auto" + "tooltip": { + "mode": "single" + } }, - "pluginVersion": "8.4.2", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "datasource": { - "type": "prometheus", - "uid": "a65Bu5Enk" - }, - "exemplar": false, - "expr": "zookeeper_outstandingrequests{namespace=\"$ns\"}", - "instant": true, + "datasource": null, + "expr": "zookeeper_outstandingrequests{namespace=\"$env\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{pod}} ({{member_type}})", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{pod}} ({{server_id}}:{{member_type}})", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Outstanding Requests", - "type": "stat" + "timeFrom": null, + "timeShift": null, + "title": "ZK: Outstanding Requests", + "transformations": [], + "transparent": false, + "type": "timeseries" }, { + "cacheTimeout": null, "collapsed": false, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 5 + "y": 1 }, - "id": 4, + "height": null, + "hideTimeOverride": false, + "id": 7, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [], - "title": "Kafka Cluster", + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Kafka cluster", + "transformations": [], + "transparent": false, "type": "row" }, { - "description": "Number of active controllers in the cluster.", + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Count of brokers available (online).\n This value is referential and should not be used for alerting.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#299c46", - "value": null - }, - { - "color": "#e5ac0e", - "value": 2 - }, - { - "color": "#bf1b00" + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] }, @@ -477,85 +619,81 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 0, - "y": 6 + "y": 1 }, - "id": 2, + "height": null, + "hideTimeOverride": false, + "id": 8, + "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "vertical", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, - "textMode": "value_and_name" + "textMode": "auto" }, - "pluginVersion": "8.4.2", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "datasource": { - "type": "prometheus", - "uid": "a65Bu5Enk" - }, - "exemplar": false, - "expr": "kafka_controller_kafkacontroller_activecontrollercount{namespace=\"$ns\"} > 0", + "datasource": null, + "expr": "count(kafka_server_replicamanager_leadercount{namespace=\"$env\"})", "format": "time_series", - "instant": true, + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{pod}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Active Controllers", + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Online Brokers", + "transformations": [], + "transparent": false, "type": "stat" }, { - "datasource": { - "type": "prometheus", - "uid": "a65Bu5Enk" - }, - "description": "Number of Brokers Online", + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Active Controller broker.\n It should always be 1. If the value is different than 1, then it must be alerted for troubleshooting.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#d44a3a", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 0 - }, - { - "color": "#299c46", - "value": 2 + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] }, @@ -564,82 +702,81 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 4, - "y": 6 + "y": 1 }, - "id": 6, + "height": null, + "hideTimeOverride": false, + "id": 9, + "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, - "textMode": "auto" + "textMode": "value_and_name" }, - "pluginVersion": "8.4.2", - "repeatDirection": "h", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "count(kafka_server_replicamanager_leadercount{namespace=\"$ns\"})", + "datasource": null, + "expr": "kafka_controller_kafkacontroller_activecontrollercount{namespace=\"$env\"} > 0", "format": "time_series", - "instant": true, + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Brokers Online", + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Active Controller", + "transformations": [], + "transparent": false, "type": "stat" }, { - "datasource": { - "type": "prometheus", - "uid": "a65Bu5Enk" - }, - "description": "Partitions that are online", + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Sum of Topic partitions across the cluster.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#d44a3a", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 0 - }, - { - "color": "#299c46", - "value": 0 + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] }, @@ -648,81 +785,89 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 8, - "y": 6 + "y": 1 }, - "id": 8, + "height": null, + "hideTimeOverride": false, + "id": 10, + "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.4.2", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(kafka_server_replicamanager_partitioncount{namespace=\"$ns\"})", + "datasource": null, + "expr": "sum(kafka_server_replicamanager_partitioncount{namespace=\"$env\"})", "format": "time_series", - "instant": true, + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 1, + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Online Partitions", + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Sum of Partitions", + "transformations": [], + "transparent": false, "type": "stat" }, { - "datasource": { - "type": "prometheus", - "uid": "a65Bu5Enk" - }, - "description": "Number of under-replicated partitions (| ISR | < | all replicas |).", + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Sum of Under-Replicated Partitions. This is caused by broker or volumes unavailable, impacting replicas to be offline, and reducing the ISR set for those partitions.\n There are transient scenarios that could lead to this number growing (e.g. broker restart), but if the number doesn't shrink in a short period of time (e.g. 1 minute), then it's recommended to alert.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#508642", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 1 + "color": "green", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" }, { - "color": "#bf1b00", - "value": 5 + "color": "red", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" } ] }, @@ -731,82 +876,89 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 12, - "y": 6 + "y": 1 }, - "id": 10, + "height": null, + "hideTimeOverride": false, + "id": 11, + "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.4.2", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(kafka_server_replicamanager_underreplicatedpartitions{namespace=\"$ns\"})", + "datasource": null, + "expr": "sum(kafka_server_replicamanager_underreplicatedpartitions{namespace=\"$env\"})", "format": "time_series", "hide": false, - "instant": true, + "instant": false, "interval": "", "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Under Replicated Partitions", + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Sum of Under-Replicated Partitions (URP)", + "transformations": [], + "transparent": false, "type": "stat" }, { - "datasource": { - "type": "prometheus", - "uid": "a65Bu5Enk" - }, - "description": "Number of partitions under min insync replicas.", + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Number of partitions where the number of replicas offline is higher than the minimum ISR configuration.\n This means partitions are not available for Producers with acks=all.\n It's recommended alerting when this values is higher than 0.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#508642", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 1 + "color": "green", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" }, { - "color": "#bf1b00", - "value": 5 + "color": "red", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" } ] }, @@ -815,82 +967,89 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 16, - "y": 6 + "y": 1 }, + "height": null, + "hideTimeOverride": false, "id": 12, + "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.4.2", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(kafka_cluster_partition_underminisr{namespace=\"$ns\"})", + "datasource": null, + "expr": "sum(kafka_cluster_partition_underminisr{namespace=\"$env\"})", "format": "time_series", "hide": false, - "instant": true, + "instant": false, "interval": "", "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Under Min ISR Partitions", + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Sum of Under-MinISR Partitions", + "transformations": [], + "transparent": false, "type": "stat" }, { - "datasource": { - "type": "prometheus", - "uid": "a65Bu5Enk" - }, - "description": "Number of partitions that dont have an active leader and are hence not writable or readable.", + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Number of partitions where all replicas are offline.\n Producers and Consumers are affected by this condition.\n It's recommended alerting when this values is higher than 0.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#508642", - "value": null - }, - { - "color": "#ef843c", - "value": 1 + "color": "green", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" }, { - "color": "#bf1b00", - "value": 1 + "color": "red", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" } ] }, @@ -899,451 +1058,19 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 20, - "y": 6 + "y": 1 }, - "id": 14, + "height": null, + "hideTimeOverride": false, + "id": 13, + "interval": null, "links": [], "maxDataPoints": 100, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "8.4.2", - "targets": [ - { - "exemplar": true, - "expr": "sum(kafka_controller_kafkacontroller_offlinepartitionscount{namespace=\"$ns\"})", - "format": "time_series", - "instant": true, - "interval": "", - "intervalFactor": 1, - "legendFormat": "", - "refId": "A" - } - ], - "title": "Offline Partitions Count", - "type": "stat" - }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 10 - }, - "id": 26, - "panels": [], - "title": "Shema Registry", - "type": "row" - }, - { - "datasource": { - "type": "prometheus", - "uid": "a65Bu5Enk" - }, - "fieldConfig": { - "defaults": { - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "red", - "value": null - }, - { - "color": "green", - "value": 1 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 0, - "y": 11 - }, - "id": 30, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "mean" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "8.4.2", - "targets": [ - { - "exemplar": true, - "expr": "count(kafka_schema_registry_registered_count{namespace=\"$ns\"})", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "title": "Schema Registry Instances", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "a65Bu5Enk" - }, - "fieldConfig": { - "defaults": { - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 4, - "y": 11 - }, - "id": 28, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "mean" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "8.4.2", - "targets": [ - { - "exemplar": true, - "expr": "sum(kafka_schema_registry_registered_count{namespace=\"$ns\"})", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "title": "Schemas registered", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "a65Bu5Enk" - }, - "fieldConfig": { - "defaults": { - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 8, - "y": 11 - }, - "id": 33, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "mean" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "8.4.2", - "targets": [ - { - "exemplar": true, - "expr": "sum(kafka_schema_registry_schemas_deleted{namespace=\"$ns\"})", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "title": "Schemas deleted", - "type": "stat" - }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 15 - }, - "id": 37, - "panels": [], - "repeat": "cluster", - "title": "Kafka Connect ($kafka_connect_app) ", - "type": "row" - }, - { - "datasource": { - "type": "prometheus", - "uid": "a65Bu5Enk" - }, - "fieldConfig": { - "defaults": { - "decimals": 0, - "mappings": [], - "noValue": "0", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 0, - "y": 16 - }, - "id": 39, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "mean" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "8.4.2", - "targets": [ - { - "exemplar": true, - "expr": "count(kafka_connect_connect_worker_metrics_connector_count{namespace=\"$ns\",app=~\"$kafka_connect_app\"})", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "title": "Connect worker instances", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "a65Bu5Enk" - }, - "fieldConfig": { - "defaults": { - "decimals": 0, - "mappings": [], - "noValue": "0", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 4, - "y": 16 - }, - "id": 48, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "mean" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "8.4.2", - "targets": [ - { - "exemplar": true, - "expr": "sum(kafka_connect_connect_worker_metrics_connector_total_task_count{namespace=\"$ns\",app=~\"$kafka_connect_app\"})", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "title": "Tasks Total", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "a65Bu5Enk" - }, - "fieldConfig": { - "defaults": { - "decimals": 0, - "mappings": [], - "noValue": "0", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 8, - "y": 16 - }, - "id": 41, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "mean" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "8.4.2", - "targets": [ - { - "exemplar": true, - "expr": "sum(kafka_connect_connect_worker_metrics_connector_running_task_count{namespace=\"$ns\",app=~\"$kafka_connect_app\"})", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "title": "Tasks Running", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "a65Bu5Enk" - }, - "fieldConfig": { - "defaults": { - "decimals": 0, - "mappings": [], - "noValue": "0", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "orange", - "value": 1 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 12, - "y": 16 - }, - "id": 43, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", "graphMode": "area", @@ -1351,216 +1078,118 @@ "orientation": "auto", "reduceOptions": { "calcs": [ - "mean" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.4.2", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_connect_connect_worker_metrics_connector_paused_task_count{namespace=\"$ns\",instance=~\"$instance\",app=~\"$kafka_connect_app\",connector=~\"$connector\"})", - "instant": true, + "datasource": null, + "expr": "sum(kafka_controller_kafkacontroller_offlinepartitionscount{namespace=\"$env\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Tasks Paused", + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Sum of Offline Partitions", "transformations": [], + "transparent": false, "type": "stat" }, { - "datasource": { - "type": "prometheus", - "uid": "a65Bu5Enk" - }, + "cacheTimeout": null, + "collapsed": false, + "datasource": null, + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "decimals": 0, - "mappings": [], - "noValue": "0", "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 1 - } - ] + "steps": [] } - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 16, - "y": 16 - }, - "id": 45, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "8.4.2", - "targets": [ - { - "expr": "sum(kafka_connect_connect_worker_metrics_connector_failed_task_count{namespace=\"$ns\",instance=~\"$instance\",app=~\"$kafka_connect_app\",connector=~\"$connector\"})", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "title": "Tasks Failed", - "transformations": [], - "type": "stat" - }, - { - "description": "Time since last rebalance", - "fieldConfig": { - "defaults": { - "decimals": 0, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "#299c46", - "value": null - } - ] - }, - "unit": "clockms" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 20, - "y": 16 - }, - "id": 47, - "links": [], - "maxDataPoints": 100, - "options": { - "colorMode": "value", - "fieldOptions": { - "calcs": [ - "lastNotNull" - ] - }, - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "8.4.2", - "repeat": "instance", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "a65Bu5Enk" - }, - "exemplar": true, - "expr": "kafka_connect_connect_worker_rebalance_metrics_time_since_last_rebalance_ms{namespace=\"$ns\",app=~\"$kafka_connect_app\"} >= 0", - "format": "time_series", - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{pod}}", - "refId": "A" } - ], - "title": "Time since last rebalance ", - "type": "stat" - }, - { - "collapsed": false, + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 20 + "y": 2 }, - "id": 52, + "height": null, + "hideTimeOverride": false, + "id": 14, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [], - "repeat": "clusterid", - "title": "ksqlDB Cluster ($ksqldb_cluster_id) ", + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Schema Registry cluster", + "transformations": [], + "transparent": false, "type": "row" }, { - "datasource": { - "type": "prometheus", - "uid": "a65Bu5Enk" - }, - "description": "Average number of active queries per server.", + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Schema Registry online instances returning metrics.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#d44a3a", - "value": null + "color": "red", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" }, { - "color": "rgba(237, 129, 40, 0.89)", - "value": 0 + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" }, { - "color": "#299c46", - "value": 1 + "color": "green", + "index": 2, + "line": true, + "op": "gt", + "value": 2.0, + "yaxis": "left" } ] }, @@ -1569,76 +1198,81 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 0, - "y": 21 + "y": 2 }, - "id": 50, + "height": null, + "hideTimeOverride": false, + "id": 15, + "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.4.2", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "count(ksql_ksql_engine_query_stats_num_active_queries{namespace=\"$ns\", ksql_cluster=\"$ksqldb_cluster_id\"})", - "instant": true, + "datasource": null, + "expr": "count(kafka_schema_registry_registered_count{namespace=\"$env\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "ksqlDB instances", + "timeFrom": null, + "timeShift": null, + "title": "SR: Online instances", + "transformations": [], + "transparent": false, "type": "stat" }, { - "datasource": { - "type": "prometheus", - "uid": "a65Bu5Enk" - }, - "description": "Average number of active queries per server.", + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Average number of registered schemas across the cluster.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#d44a3a", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 0 - }, - { - "color": "#299c46", - "value": 1 + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] }, @@ -1647,76 +1281,81 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 4, - "y": 21 + "y": 2 }, - "id": 53, + "height": null, + "hideTimeOverride": false, + "id": 16, + "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.4.2", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "avg(ksql_ksql_engine_query_stats_num_active_queries{namespace=\"$ns\", ksql_cluster=\"$ksqldb_cluster_id\"})", - "instant": true, + "datasource": null, + "expr": "avg(kafka_schema_registry_registered_count{namespace=\"$env\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Active Queries", + "timeFrom": null, + "timeShift": null, + "title": "SR: Registered Schemas (avg.)", + "transformations": [], + "transparent": false, "type": "stat" }, { - "datasource": { - "type": "prometheus", - "uid": "a65Bu5Enk" - }, - "description": "Num of created queries", + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Average number of schemas created, by type.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#299c46", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 500 - }, - { - "color": "#d44a3a", - "value": 800 + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] }, @@ -1725,75 +1364,81 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 8, - "y": 21 + "y": 2 }, - "id": 55, + "height": null, + "hideTimeOverride": false, + "id": 17, + "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.4.2", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "avg(ksql_ksql_engine_query_stats_running_queries{namespace=\"$ns\", ksql_cluster=\"$ksqldb_cluster_id\"})", + "datasource": null, + "expr": "avg(kafka_schema_registry_schemas_created{namespace=\"$env\"}) by (schema_type)", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{schema_type}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Running Queries", + "timeFrom": null, + "timeShift": null, + "title": "SR: Schemas Created by Type (avg.)", + "transformations": [], + "transparent": false, "type": "stat" }, { - "datasource": { - "type": "prometheus", - "uid": "a65Bu5Enk" - }, - "description": "Num of rebalancing queries", + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Average number of schemas deleted, by type.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#299c46", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 1 - }, - { - "color": "#d44a3a", - "value": 5 + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] }, @@ -1802,271 +1447,1200 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 12, - "y": 21 + "y": 2 }, - "id": 57, + "height": null, + "hideTimeOverride": false, + "id": 18, + "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.4.2", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(ksql_ksql_engine_query_stats_rebalancing_queries{namespace=\"$ns\", ksql_cluster=\"$ksqldb_cluster_id\"})", + "datasource": null, + "expr": "sum(kafka_schema_registry_schemas_deleted{namespace=\"$env\"}) by (schema_type)", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{schema_type}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Rebalancing Queries", + "timeFrom": null, + "timeShift": null, + "title": "SR: Schemas Deleted by Type (avg.)", + "transformations": [], + "transparent": false, "type": "stat" }, { - "datasource": { - "type": "prometheus", - "uid": "a65Bu5Enk" - }, - "description": "Number of error query", + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "mappings": [ + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 19, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Kafka Connect online workers returning metrics.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 0, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 20, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ { - "options": { - "match": "null", - "result": { - "text": "N/A" - } + "datasource": null, + "expr": "count(kafka_connect_connect_worker_metrics_connector_count{namespace=\"$env\",app=~\"$connect_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connect: Online Workers", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Number of tasks deployed on Kafka Connect cluster.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] }, - "type": "special" + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 4, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 21, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_connect_connect_worker_metrics_connector_total_task_count{namespace=\"$env\",app=~\"$connect_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "#299c46", - "value": null + "timeFrom": null, + "timeShift": null, + "title": "Connect: Sum of Total Tasks", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Number of Running Tasks on the Kafka Connect cluster.\n Ideally, this number should be equal to the total number of tasks deployed.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "green", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 1 + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 8, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 22, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_connect_connect_worker_metrics_connector_running_task_count{namespace=\"$env\",app=~\"$connect_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connect: Sum of Running Tasks", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Number of Paused Tasks on the Kafka Connect cluster.\n Ideally, this number should be zero, as tasks should be running.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] }, - { - "color": "#d44a3a", - "value": 2 - } - ] + "unit": "none" + }, + "overrides": [] }, - "unit": "none" + "gridPos": { + "h": 5, + "w": 4, + "x": 12, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 23, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_connect_connect_worker_metrics_connector_paused_task_count{namespace=\"$env\",app=~\"$connect_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connect: Sum of Paused Tasks", + "transformations": [], + "transparent": false, + "type": "stat" }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 16, - "y": 21 - }, - "id": 59, - "links": [], - "maxDataPoints": 100, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Number of Paused Tasks on the Kafka Connect cluster.\n Ideally, this number should be zero, as tasks should be running.\n It's recommended alerting when this value is higher than 0.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "red", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 16, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 24, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_connect_connect_worker_metrics_connector_failed_task_count{namespace=\"$env\",app=~\"$connect_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } ], - "fields": "", - "values": false + "timeFrom": null, + "timeShift": null, + "title": "Connect: Sum of Failed Tasks", + "transformations": [], + "transparent": false, + "type": "stat" }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "8.4.2", - "targets": [ { - "exemplar": true, - "expr": "avg(ksql_ksql_engine_query_stats_error_queries{namespace=\"$ns\", ksql_cluster=\"$ksqldb_cluster_id\"})", - "interval": "", - "legendFormat": "", - "refId": "A" + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Informative value. Time since last rebalance.\n When this value is continuously and repeatedly low means some connectors are failing and rebalancing is triggered constantly.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "clockms" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 20, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 25, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_connect_worker_rebalance_metrics_time_since_last_rebalance_ms{namespace=\"$env\",app=~\"$connect_cluster\"} >= 0", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connect: Time since last rebalance", + "transformations": [], + "transparent": false, + "type": "stat" } ], - "title": "Queries in Error State", - "type": "stat" + "repeat": "connect_cluster", + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Kafka Connect cluster: $connect_cluster", + "transformations": [], + "transparent": false, + "type": "row" }, { - "datasource": { - "type": "prometheus", - "uid": "a65Bu5Enk" - }, - "description": "Num of not running queries", + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "#299c46", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 1 - }, - { - "color": "#d44a3a", - "value": 5 - } - ] - }, - "unit": "none" - }, - "overrides": [] + "steps": [] + } + } }, "gridPos": { - "h": 4, - "w": 4, - "x": 20, - "y": 21 + "h": 1, + "w": 24, + "x": 0, + "y": 4 }, - "id": 61, + "height": null, + "hideTimeOverride": false, + "id": 26, + "interval": null, "links": [], "maxDataPoints": 100, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "ksqlDB online instances returning metrics.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 0, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 27, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "count(ksql_ksql_engine_query_stats_num_active_queries{namespace=\"$env\",app=\"$ksqldb_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } ], - "fields": "", - "values": false + "timeFrom": null, + "timeShift": null, + "title": "ksqlDB: Online instances", + "transformations": [], + "transparent": false, + "type": "stat" }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "8.4.2", - "targets": [ { - "exemplar": true, - "expr": "sum(ksql_ksql_engine_query_stats_not_running_queries{namespace=\"$ns\", ksql_cluster=\"$ksqldb_cluster_id\"})", - "interval": "", - "legendFormat": "", - "refId": "A" + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Number of active queries deployed in the cluster.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 4, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 28, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(ksql_ksql_engine_query_stats_num_active_queries{namespace=\"$env\",app=\"$ksqldb_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ksqlDB: Sum of Active Queries", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Number of running queries deployed in the cluster.\n Ideally, this number should be equal to the number of active queries as queries should be running.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "green", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 8, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 29, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(ksql_ksql_engine_query_stats_running_queries{namespace=\"$env\",app=\"$ksqldb_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ksqlDB: Sum of Running Queries", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Number of queries rebalancing in the cluster.\n Ideally, this number should be equal zero, or return to zero in a short period (e.g. 1 minute).\n It's recommended to alert if the number of rebalancing queries stay higher than 0 for a longer period of time.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 12, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 30, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(ksql_ksql_engine_query_stats_rebalancing_queries{namespace=\"$env\",app=\"$ksqldb_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ksqlDB: Sum of Rebalancing Queries", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Number of queries failed in the cluster.\n Ideally, this number should be equal zero.\n It's recommended to alert if the number of queries failed is higher than 0.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "red", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 16, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 31, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "avg(ksql_ksql_engine_query_stats_error_queries{namespace=\"$env\",app=\"$ksqldb_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connect: Sum of Queries Failed", + "transformations": [], + "transparent": false, + "type": "stat" } ], - "title": "Stopped Queries", - "type": "stat" + "repeat": "ksqldb_cluster", + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "ksqlDB cluster: $ksqldb_cluster", + "transformations": [], + "transparent": false, + "type": "row" } ], - "refresh": "1m", - "schemaVersion": 35, + "refresh": "30s", + "rows": [], + "schemaVersion": 12, + "sharedCrosshair": false, "style": "dark", - "tags": [], + "tags": [ + "confluent", + "kafka", + "zookeeper", + "kafka-connect", + "schema-registry", + "ksqldb" + ], "templating": { "list": [ { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { "selected": false, - "text": "confluent", - "value": "confluent" + "tags": [], + "text": null, + "value": null }, - "definition": "label_values(namespace)", + "datasource": "${DS_PROMETHEUS}", "hide": 0, "includeAll": false, - "label": "Namespace", + "label": "Environment", "multi": false, - "name": "ns", + "name": "env", "options": [], - "query": { - "query": "label_values(namespace)", - "refId": "StandardVariableQuery" - }, + "query": "label_values(namespace)", "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "type": "query" + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false }, { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { "selected": false, - "text": "connect", - "value": "connect" + "tags": [], + "text": null, + "value": null }, - "definition": "label_values(kafka_connect_connect_worker_metrics_connector_count{namespace=\"$ns\"}, app)", - "hide": 0, + "datasource": "${DS_PROMETHEUS}", + "hide": true, "includeAll": false, - "label": "Connect cluster", + "label": "Kafka Connect cluster", "multi": false, - "name": "kafka_connect_app", + "name": "connect_cluster", "options": [], - "query": { - "query": "label_values(kafka_connect_connect_worker_metrics_connector_count{namespace=\"$ns\"}, app)", - "refId": "StandardVariableQuery" - }, + "query": "label_values(kafka_connect_connect_worker_metrics_connector_count{namespace=\"$env\"}, app)", "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "type": "query" + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false }, { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { "selected": false, - "text": "confluent.ksqldb_", - "value": "confluent.ksqldb_" + "tags": [], + "text": null, + "value": null }, - "definition": "label_values(ksql_ksql_engine_query_stats_liveness_indicator{namespace=\"$ns\"},ksql_cluster)", - "hide": 0, + "datasource": "${DS_PROMETHEUS}", + "hide": true, "includeAll": false, - "label": "ksqlDB Cluster ID", + "label": "ksqlDB cluster", "multi": false, - "name": "ksqldb_cluster_id", + "name": "ksqldb_cluster", "options": [], - "query": { - "query": "label_values(ksql_ksql_engine_query_stats_liveness_indicator{namespace=\"$ns\"},ksql_cluster)", - "refId": "StandardVariableQuery" - }, + "query": "label_values(ksql_ksql_engine_query_stats_liveness_indicator{namespace=\"$env\"}, app)", "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "type": "query" + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false } ] }, @@ -2074,10 +2648,34 @@ "from": "now-1h", "to": "now" }, - "timepicker": {}, - "timezone": "", - "title": "Confluent Platform overview", - "uid": "JiqnBMNnz", - "version": 2, - "weekStart": "" -} \ No newline at end of file + "timepicker": { + "hidden": false, + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Confluent Platform overview - v2", + "uid": null, + "version": 0 +} diff --git a/cfk-prometheus-grafana/grafana/kafka-cluster.json b/cfk-prometheus-grafana/grafana/kafka-cluster.json index 09734d06..3db845de 100644 --- a/cfk-prometheus-grafana/grafana/kafka-cluster.json +++ b/cfk-prometheus-grafana/grafana/kafka-cluster.json @@ -1,75 +1,87 @@ { + "__inputs": [ + { + "description": "", + "label": "Prometheus", + "name": "DS_PROMETHEUS", + "pluginId": "prometheus", + "pluginName": "Prometheus", + "type": "datasource" + } + ], "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "target": { - "limit": 100, - "matchAny": false, - "tags": [], - "type": "dashboard" - }, - "type": "dashboard" - } - ] + "list": [] }, - "description": "Kafka resource usage and throughput", + "description": "Overview of the Kafka cluster", "editable": true, - "gnetId": 721, - "graphTooltip": 0, - "id": 6, - "iteration": 1647427255896, + "gnetId": null, + "hideControls": false, + "id": null, "links": [], "panels": [ { + "cacheTimeout": null, "collapsed": false, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }, - "id": 42, + "height": null, + "hideTimeOverride": false, + "id": 1, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [], - "title": "Healthcheck", + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Cluster Overview", + "transformations": [], + "transparent": false, "type": "row" }, { "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Number of active controllers in the cluster.", + "datasource": "${DS_PROMETHEUS}", + "description": "Count of brokers available (online).\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#299c46", - "value": null - }, - { - "color": "#e5ac0e", - "value": 2 - }, - { - "color": "#bf1b00" + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] }, @@ -78,79 +90,81 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 0, - "y": 1 + "y": 0 }, - "id": 12, + "height": null, + "hideTimeOverride": false, + "id": 2, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "vertical", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, - "textMode": "value_and_name" + "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_controller_kafkacontroller_activecontrollercount{namespace=\"$ns\",pod=~\"$pod\"} > 0", + "datasource": null, + "expr": "count(kafka_server_replicamanager_leadercount{namespace=\"$env\"})", "format": "time_series", - "instant": true, + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Active Controllers", + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Online Brokers", + "transformations": [], + "transparent": false, "type": "stat" }, { "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Number of Brokers Online", + "datasource": "${DS_PROMETHEUS}", + "description": "Active Controller broker.\n It should always be 1. If the value is different than 1, then it must be alerted for troubleshooting.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#d44a3a", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 0 - }, - { - "color": "#299c46", - "value": 2 + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] }, @@ -159,81 +173,81 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 4, - "y": 1 + "y": 0 }, - "id": 14, + "height": null, + "hideTimeOverride": false, + "id": 3, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, - "textMode": "auto" + "textMode": "value_and_name" }, - "pluginVersion": "8.1.3", "repeat": null, - "repeatDirection": "h", + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "count(kafka_server_replicamanager_leadercount{namespace=\"$ns\",pod=~\"$pod\"})", + "datasource": null, + "expr": "kafka_controller_kafkacontroller_activecontrollercount{namespace=\"$env\"} > 0", "format": "time_series", - "instant": true, + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Brokers Online", + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Active Controller", + "transformations": [], + "transparent": false, "type": "stat" }, { "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Partitions that are online", + "datasource": "${DS_PROMETHEUS}", + "description": "\n Number of partitions where the preferred replica is not the leader.\n Usually, this number is 0.\n Restarting nodes could cause this values to change, but when reassigning happens the value stabilize.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#d44a3a", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 0 - }, - { - "color": "#299c46", - "value": 0 + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] }, @@ -242,78 +256,81 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 8, - "y": 1 + "y": 0 }, - "id": 18, + "height": null, + "hideTimeOverride": false, + "id": 4, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_server_replicamanager_partitioncount{namespace=\"$ns\",pod=~\"$pod\"})", + "datasource": null, + "expr": "sum(kafka_controller_kafkacontroller_preferredreplicaimbalancecount{namespace=\"$env\"})", "format": "time_series", - "instant": true, + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 1, + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Online Partitions", + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Sum of Preferred Replica Imbalance", + "transformations": [], + "transparent": false, "type": "stat" }, { "cacheTimeout": null, - "datasource": "Prometheus", - "description": "", + "datasource": "${DS_PROMETHEUS}", + "description": "Number of topics in the cluster.", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#299c46", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 2 - }, - { - "color": "#d44a3a" + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] }, @@ -322,266 +339,247 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 12, "y": 1 }, - "id": 33, + "height": null, + "hideTimeOverride": false, + "id": 5, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_controller_kafkacontroller_preferredreplicaimbalancecount{namespace=\"$ns\",pod=~\"$pod\"})", + "datasource": null, + "expr": "sum(kafka_controller_kafkacontroller_globaltopiccount{namespace=\"$env\"})", "format": "time_series", - "instant": true, + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 1, + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Preferred Replica Imbalance", + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Sum of Topics", + "transformations": [], + "transparent": false, "type": "stat" }, { - "datasource": "Prometheus", + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Sum of requests per second rated over a 5 min. period.\n Gives an idea of the processing load in the cluster.", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "Bytes/s", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], + "custom": {}, + "decimals": null, "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] }, - "unit": "Bps" + "unit": "reqps" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 8, + "h": 5, + "w": 4, "x": 16, - "y": 1 + "y": 0 }, - "id": 84, + "height": null, + "hideTimeOverride": false, + "id": 6, + "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { - "legend": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { "calcs": [ - "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", - "placement": "bottom" + "fields": "", + "values": false }, - "tooltip": { - "mode": "single" - } + "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(rate(kafka_server_brokertopicmetrics_bytesinpersec{namespace=\"$ns\",pod=~\"$pod\",topic!=\"\"}[5m]))", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "Bytes in", - "metric": "kafka_server_brokertopicmetrics_bytesinpersec", - "refId": "A", - "step": 4 - }, - { - "expr": "sum(rate(kafka_server_brokertopicmetrics_bytesoutpersec{namespace=\"$ns\",pod=~\"$pod\",topic!=\"\"}[5m]))", + "datasource": null, + "expr": "sum(rate(kafka_network_requestmetrics_requestspersec{namespace=\"$env\",pod=~\"$broker\"}[5m]))", "format": "time_series", "hide": false, + "instant": false, "interval": "", "intervalFactor": 2, - "legendFormat": "Bytes out", - "metric": "kafka_server_brokertopicmetrics_bytesinpersec", - "refId": "B", - "step": 4 + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Broker network throughput", - "type": "timeseries" + "title": "Kafka: Rate of Requests/Sec", + "transformations": [], + "transparent": false, + "type": "stat" }, { "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Number of under-replicated partitions (| ISR | < | all replicas |).", + "datasource": "${DS_PROMETHEUS}", + "description": "Sum of log sizes per broker.\n This must be compared with the total storage space available in the brokers.", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#508642", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 1 - }, - { - "color": "#bf1b00", - "value": 5 + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] }, - "unit": "none" + "unit": "bytes" }, "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, - "x": 0, - "y": 5 + "x": 20, + "y": 0 }, - "id": 20, + "height": null, + "hideTimeOverride": false, + "id": 7, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, - "textMode": "auto" + "textMode": "value_and_name" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_server_replicamanager_underreplicatedpartitions{namespace=\"$ns\",pod=~\"$pod\"})", + "datasource": null, + "expr": "sum(kafka_log_log_size{namespace=\"$env\",pod=~\"$broker\"}) by (pod)", "format": "time_series", "hide": false, - "instant": true, + "instant": false, "interval": "", "intervalFactor": 2, - "legendFormat": "", - "refId": "A" + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Under Replicated Partitions", + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Log Size", + "transformations": [], + "transparent": false, "type": "stat" }, { "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Number of partitions under min insync replicas.", + "datasource": "${DS_PROMETHEUS}", + "description": "Sum of Topic partitions across the cluster.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#508642", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 1 - }, - { - "color": "#bf1b00", - "value": 5 + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] }, @@ -590,80 +588,89 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, - "x": 4, - "y": 5 + "x": 0, + "y": 1 }, - "id": 32, + "height": null, + "hideTimeOverride": false, + "id": 8, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_cluster_partition_underminisr{namespace=\"$ns\",pod=~\"$pod\"})", + "datasource": null, + "expr": "sum(kafka_server_replicamanager_partitioncount{namespace=\"$env\",pod=~\"$broker\"})", "format": "time_series", "hide": false, - "instant": true, + "instant": false, "interval": "", "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Under Min ISR Partitions", + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Sum of Partitions", + "transformations": [], + "transparent": false, "type": "stat" }, { "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Number of partitions that dont have an active leader and are hence not writable or readable.", + "datasource": "${DS_PROMETHEUS}", + "description": "Sum of Under-Replicated Partitions. This is caused by broker or volumes unavailable, impacting replicas to be offline, and reducing the ISR set for those partitions.\n There are transient scenarios that could lead to this number growing (e.g. broker restart), but if the number doesn't shrink in a short period of time (e.g. 1 minute), then it's recommended to alert.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#508642", - "value": null - }, - { - "color": "#ef843c", - "value": 1 + "color": "green", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" }, { - "color": "#bf1b00", - "value": 1 + "color": "red", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" } ] }, @@ -672,78 +679,89 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, - "x": 8, - "y": 5 + "x": 4, + "y": 1 }, - "id": 22, + "height": null, + "hideTimeOverride": false, + "id": 9, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_controller_kafkacontroller_offlinepartitionscount{namespace=\"$ns\",pod=~\"$pod\"})", + "datasource": null, + "expr": "sum(kafka_server_replicamanager_underreplicatedpartitions{namespace=\"$env\",pod=~\"$broker\"})", "format": "time_series", - "instant": true, + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 1, + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Offline Partitions Count", + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Sum of Under-Replicated Partitions (URP)", + "transformations": [], + "transparent": false, "type": "stat" }, { "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Unclean leader election rate", + "datasource": "${DS_PROMETHEUS}", + "description": "Number of partitions where the number of replicas offline is higher than the minimum ISR configuration.\n This means partitions are not available for Producers with acks=all.\n We recommend alerting when this values is higher than 0.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#299c46", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 2 + "color": "green", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" }, { - "color": "#d44a3a" + "color": "red", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" } ] }, @@ -752,82 +770,89 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, - "x": 12, - "y": 5 + "x": 8, + "y": 1 }, - "id": 16, + "height": null, + "hideTimeOverride": false, + "id": 10, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_controller_controllerstats_uncleanleaderelectionspersec{namespace=\"$ns\",pod=~\"$pod\"})", + "datasource": null, + "expr": "sum(kafka_cluster_partition_underminisr{namespace=\"$env\",pod=~\"$broker\"})", "format": "time_series", - "instant": true, + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 1, + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Unclean Leader Election Rate", + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Sum of Under-MinISR Partitions", + "transformations": [], + "transparent": false, "type": "stat" }, - { - "collapsed": false, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 9 - }, - "id": 31, - "panels": [], - "title": "Request rate", - "type": "row" - }, { "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Produce request rate.", + "datasource": "${DS_PROMETHEUS}", + "description": "Number of partitions where all replicas are offline.\n Producers and Consumers are affected by this condition.\n We recommend alerting when this values is higher than 0.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { "color": "green", - "value": null + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "red", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" } ] }, @@ -836,184 +861,272 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, - "x": 0, - "y": 10 + "x": 12, + "y": 1 }, - "id": 93, + "height": null, + "hideTimeOverride": false, + "id": 11, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(rate(kafka_network_requestmetrics_requestspersec{namespace=\"$ns\",pod=~\"$pod\"}[5m]))", + "datasource": null, + "expr": "sum(kafka_controller_kafkacontroller_offlinepartitionscount{namespace=\"$env\",pod=~\"$broker\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "All Request Per Sec", + "title": "Kafka: Sum of Offline Partitions", + "transformations": [], + "transparent": false, "type": "stat" }, { "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Produce request rate.", + "datasource": "${DS_PROMETHEUS}", + "description": "Sum of bytes in per second rated over a 5 min. period.\n Gives an idea of the incoming throughput handle by the cluster.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] }, - "unit": "none" + "unit": "binBps" }, "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, - "x": 4, - "y": 10 + "x": 16, + "y": 1 }, - "id": 35, + "height": null, + "hideTimeOverride": false, + "id": 12, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(rate(kafka_network_requestmetrics_requestspersec{namespace=\"$ns\",pod=~\"$pod\",request=\"Produce\"}[5m]))", + "datasource": null, + "expr": "sum(rate(kafka_server_brokertopicmetrics_bytesinpersec{namespace=\"$env\",pod=~\"$broker\"}[5m]))", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Produce Request Per Sec", + "title": "Kafka: Bytes In/Sec", + "transformations": [], + "transparent": false, "type": "stat" }, { "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Fetch request rate.", + "datasource": "${DS_PROMETHEUS}", + "description": "Sum of bytes out per second rated over a 5 min. period.\n Gives an idea of the outgoing throughput handle by the cluster.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] }, - "unit": "none" + "unit": "binBps" }, "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, - "x": 8, - "y": 10 + "x": 20, + "y": 1 }, - "id": 37, + "height": null, + "hideTimeOverride": false, + "id": 13, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(rate(kafka_network_requestmetrics_requestspersec{namespace=\"$ns\",pod=~\"$pod\",request=\"FetchConsumer\"}[5m]))", + "datasource": null, + "expr": "sum(rate(kafka_server_brokertopicmetrics_bytesoutpersec{namespace=\"$env\",pod=~\"$broker\"}[5m]))", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Consumer Fetch Request Per Sec", + "title": "Kafka: Bytes Out/Sec", + "transformations": [], + "transparent": false, "type": "stat" }, { + "cacheTimeout": null, + "collapsed": false, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 14, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "System resources", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Rate of CPU seconds used by the Java process.\n 100% usage represents one core. \n If there are multiple cores, the total capacity should be 100% * number_cores.", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -1035,14 +1148,12 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, "showPoints": "auto", "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, + "stacking": {}, "thresholdsStyle": { "mode": "off" } @@ -1050,33 +1161,32 @@ "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } + "steps": [] + }, + "unit": "percentunit" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 10 + "h": 10, + "w": 8, + "x": 0, + "y": 2 }, - "id": 122, + "height": null, + "hideTimeOverride": false, + "id": 15, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull", "max", - "mean" + "mean", + "last" ], "displayMode": "table", "placement": "bottom" @@ -1085,1544 +1195,268 @@ "mode": "single" } }, + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "rate(kafka_network_requestmetrics_errorspersec{error!=\"NONE\"}[5m])", + "datasource": null, + "expr": "irate(process_cpu_seconds_total{namespace=\"$env\",pod=~\"$broker\"}[5m])", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{error}} @ {{hostname}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Errors", + "timeFrom": null, + "timeShift": null, + "title": "CPU usage", + "transformations": [], + "transparent": false, "type": "timeseries" }, { "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Fetch request rate.", + "datasource": "${DS_PROMETHEUS}", + "description": "Sum of JVM memory used, without including areas (e.g. heap size).", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" } - ], + }, + "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "steps": [] }, - "unit": "none" + "unit": "bytes" }, "overrides": [] }, "gridPos": { - "h": 4, - "w": 4, - "x": 0, - "y": 14 + "h": 10, + "w": 8, + "x": 8, + "y": 2 }, - "id": 94, + "height": null, + "hideTimeOverride": false, + "id": 16, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "horizontal", - "reduceOptions": { + "legend": { "calcs": [ - "lastNotNull" + "max", + "mean", + "last" ], - "fields": "", - "values": false + "displayMode": "table", + "placement": "bottom" }, - "text": {}, - "textMode": "auto" + "tooltip": { + "mode": "single" + } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(rate(kafka_network_requestmetrics_requestspersec{namespace=\"$ns\",pod=~\"$pod\",request=\"Fetch\"}[5m]))", + "datasource": null, + "expr": "sum without(area)(jvm_memory_bytes_used{namespace=\"$env\",pod=~\"$broker\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Broker Fetch Request Per Sec", - "type": "stat" + "title": "Memory usage", + "transformations": [], + "transparent": false, + "type": "timeseries" }, { "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Offset Commit request rate.", + "datasource": "${DS_PROMETHEUS}", + "description": "Sum of seconds used by Garbage Collection.", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" } - ], + }, + "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "steps": [] }, - "unit": "none" + "unit": "percentunit" }, "overrides": [] }, "gridPos": { - "h": 4, - "w": 4, - "x": 4, - "y": 14 + "h": 10, + "w": 8, + "x": 16, + "y": 2 }, - "id": 38, + "height": null, + "hideTimeOverride": false, + "id": 17, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "horizontal", - "reduceOptions": { + "legend": { "calcs": [ - "lastNotNull" + "max", + "mean", + "last" ], - "fields": "", - "values": false + "displayMode": "table", + "placement": "bottom" }, - "text": {}, - "textMode": "auto" + "tooltip": { + "mode": "single" + } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(rate(kafka_network_requestmetrics_requestspersec{namespace=\"$ns\",pod=~\"$pod\",request=\"OffsetCommit\"}[5m]))", + "datasource": null, + "expr": "sum without(gc)(irate(jvm_gc_collection_seconds_sum{namespace=\"$env\",pod=~\"$broker\"}[5m]))", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Offset Commit Request Per Sec", - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Metadata request rate.", - "fieldConfig": { - "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 8, - "y": 14 - }, - "id": 36, - "interval": null, - "links": [], - "maxDataPoints": 100, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "expr": "sum(rate(kafka_network_requestmetrics_requestspersec{namespace=\"$ns\",pod=~\"$pod\",request=\"Metadata\"}[5m]))", - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Metadata Request Per Sec", - "type": "stat" - }, - { - "collapsed": false, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 18 - }, - "id": 40, - "panels": [], - "title": "System", - "type": "row" - }, - { - "datasource": "Prometheus", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "Cores", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "percent" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "localhost:7071" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#629E51", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 0, - "y": 19 - }, - "id": 27, - "links": [], - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "expr": "irate(process_cpu_seconds_total{namespace=\"$ns\",pod=~\"$pod\"}[5m])*100", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "metric": "process_cpu_secondspersec", - "refId": "A", - "step": 4 - } - ], - "timeFrom": null, - "timeShift": null, - "title": "CPU Usage", - "type": "timeseries" - }, - { - "datasource": "Prometheus", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "Memory", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "localhost:7071" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#BA43A9", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 8, - "y": 19 - }, - "id": 2, - "links": [], - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "expr": "sum without(area)(jvm_memory_bytes_used{namespace=\"$ns\",pod=~\"$pod\"})", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "metric": "jvm_memory_bytes_used", - "refId": "A", - "step": 4 - }, - { - "expr": "jvm_memory_bytes_max{area=\"heap\",namespace=\"$ns\",pod=~\"$pod\"}", - "interval": "", - "legendFormat": "{{instance}}", - "refId": "B" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "JVM Memory Used", - "type": "timeseries" - }, - { - "datasource": "Prometheus", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "% time in GC", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "localhost:7071" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#890F02", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 19 - }, - "id": 3, - "links": [], - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "expr": "sum without(gc)(rate(jvm_gc_collection_seconds_sum{namespace=\"$ns\",pod=~\"$pod\"}[5m]))", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "metric": "jvm_gc_collection_seconds_sum", - "refId": "A", - "step": 4 - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Time spent in GC", - "type": "timeseries" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 27 - }, - "id": 29, - "panels": [ - { - "datasource": "Prometheus", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "Messages/s", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "cps" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 0, - "y": 24 - }, - "id": 4, - "links": [], - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "exemplar": true, - "expr": "sum without(instance,topic)(rate(kafka_server_brokertopicmetrics_messagesinpersec{namespace=\"$ns\",topic!=\"\"}[5m]))", - "interval": "", - "intervalFactor": 2, - "legendFormat": "bytes/sec", - "metric": "kafka_server_brokertopicmetrics_messagesinpersec", - "refId": "A", - "step": 4 - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Messages In", - "type": "timeseries" - }, - { - "datasource": "Prometheus", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "Bytes/s", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "Bps" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 8, - "y": 24 - }, - "id": 5, - "links": [], - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "exemplar": true, - "expr": "sum without(instance,topic)(rate(kafka_server_brokertopicmetrics_bytesinpersec{namespace=\"$ns\",topic!=\"\"}[5m]))", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "bytes/sec", - "metric": "kafka_server_brokertopicmetrics_bytesinpersec", - "refId": "A", - "step": 4 - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Bytes In", - "type": "timeseries" - }, - { - "datasource": "Prometheus", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "Bytes/s", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "Bps" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 24 - }, - "id": 6, - "links": [], - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "exemplar": true, - "expr": "sum without(instance,topic)(rate(kafka_server_brokertopicmetrics_bytesoutpersec{namespace=\"$ns\",topic!=\"\"}[5m]))", - "interval": "", - "intervalFactor": 2, - "legendFormat": "bytes/sec", - "metric": "kafka_server_brokertopicmetrics_bytesinpersec", - "refId": "A", - "step": 4 - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Bytes Out", - "type": "timeseries" - }, - { - "datasource": "Prometheus", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "Messages/s", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "cps" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 0, - "y": 32 - }, - "id": 10, - "links": [], - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "expr": "sum without(topic)(rate(kafka_server_brokertopicmetrics_messagesinpersec{namespace=\"$ns\",pod=~\"$pod\",topic!=\"\"}[5m]))", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "metric": "kafka_server_brokertopicmetrics_messagesinpersec", - "refId": "A", - "step": 4 - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Messages In Per Broker", - "type": "timeseries" - }, - { - "datasource": "Prometheus", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "Bytes/s", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "Bps" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 8, - "y": 32 - }, - "id": 7, - "links": [], - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "expr": "sum without(topic)(rate(kafka_server_brokertopicmetrics_bytesinpersec{namespace=\"$ns\",pod=~\"$pod\",topic!=\"\"}[5m]))", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "metric": "kafka_server_brokertopicmetrics_bytesinpersec", - "refId": "A", - "step": 4 - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Bytes In Per Broker", - "type": "timeseries" - }, - { - "datasource": "Prometheus", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "binBps" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 32 - }, - "id": 9, - "links": [], - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "expr": "sum without(topic)(rate(kafka_server_brokertopicmetrics_bytesoutpersec{namespace=\"$ns\",pod=~\"$pod\",topic!=\"\"}[5m]))", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{instance}}", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Bytes Out Per Broker", - "type": "timeseries" - } - ], - "title": "Throughput In/Out", - "type": "row" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 28 - }, - "id": 44, - "panels": [ - { - "datasource": "Prometheus", - "description": "Average fraction of time the network processor threads are idle. Values are between 0 (all resources are used) and 100 (all resources are available)\n", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 9, - "w": 12, - "x": 0, - "y": 25 - }, - "id": 24, - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "expr": "1-kafka_network_socketserver_networkprocessoravgidlepercent{namespace=\"$ns\",pod=~\"$pod\"}", - "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Network Processor Avg Usage Percent", - "type": "timeseries" - }, - { - "datasource": "Prometheus", - "description": "Average fraction of time the request handler threads are idle. Values are between 0 (all resources are used) and 100 (all resources are available).\n", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 25 - }, - "id": 25, - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "expr": "1 - kafka_server_kafkarequesthandlerpool_requesthandleravgidlepercent_total{namespace=\"$ns\",pod=~\"$pod\"}", - "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Request Handler Avg Percent", - "type": "timeseries" - } - ], - "title": "Thread utilization", - "type": "row" + "title": "GC collection", + "transformations": [], + "transparent": false, + "type": "timeseries" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 29 + "y": 3 }, - "id": 86, + "height": null, + "hideTimeOverride": false, + "id": 18, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { - "datasource": "Prometheus", - "description": "Latency in millseconds for ZooKeeper requests from broker.\n", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 9, - "w": 12, - "x": 0, - "y": 26 - }, - "id": 88, - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "expr": "kafka_server_zookeeperclientmetrics_zookeeperrequestlatencyms{namespace=\"$ns\",pod=~\"$pod\",quantile=~\"$percentile\"}", - "hide": false, - "legendFormat": "{{instance}} - {{quantile}}", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Zookeeper Request Latency", - "type": "timeseries" - }, - { - "datasource": "Prometheus", - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 26 - }, - "id": 92, - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "expr": "kafka_server_sessionexpirelistener_zookeepersyncconnectspersec{namespace=\"$ns\",pod=~\"$pod\"}", - "hide": false, - "instant": false, - "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Zookeeper connections per sec", - "type": "timeseries" - }, - { - "datasource": "Prometheus", - "description": "", + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Number of messages into topics per second, aggregated by sum without topic.", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -2633,7 +1467,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -2644,51 +1478,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "short" + "unit": "cps" }, "overrides": [] }, "gridPos": { - "h": 9, + "h": 10, "w": 8, "x": 0, - "y": 35 + "y": 3 }, - "id": 89, + "height": null, + "hideTimeOverride": false, + "id": 19, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -2697,24 +1525,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_server_sessionexpirelistener_zookeeperexpirespersec{namespace=\"$ns\",pod=~\"$pod\"}", + "datasource": null, + "expr": "sum without(topic) (rate(kafka_server_brokertopicmetrics_messagesinpersec{namespace=\"$env\",pod=~\"$broker\"}[5m]))", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Zookeeper expired connections per sec", + "title": "Messages In/Sec", + "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": "Prometheus", - "description": "", + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Number of bytes into topics per second, aggregated by sum without topic.", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -2725,7 +1567,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -2736,51 +1578,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "short" + "unit": "binBps" }, "overrides": [] }, "gridPos": { - "h": 9, + "h": 10, "w": 8, "x": 8, - "y": 35 + "y": 3 }, - "id": 90, + "height": null, + "hideTimeOverride": false, + "id": 20, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -2789,24 +1625,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_server_sessionexpirelistener_zookeeperdisconnectspersec{namespace=\"$ns\",pod=~\"$pod\"}", + "datasource": null, + "expr": "sum without(topic) (rate(kafka_server_brokertopicmetrics_bytesinpersec{namespace=\"$env\",pod=~\"$broker\"}[5m]))", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Zookeeper disconnect per sec", + "title": "Bytes In/Sec", + "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": "Prometheus", - "description": "", + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Number of bytes out of topics per second, aggregated by sum without topic.", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -2817,7 +1667,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -2828,51 +1678,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" + "log": 2, + "type": "linear" }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "short" + "unit": "binBps" }, "overrides": [] }, "gridPos": { - "h": 9, + "h": 10, "w": 8, "x": 16, - "y": 35 + "y": 3 }, - "id": 91, + "height": null, + "hideTimeOverride": false, + "id": 21, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -2881,39 +1725,80 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_server_sessionexpirelistener_zookeeperauthfailurespersec{namespace=\"$ns\",pod=~\"$pod\"}", + "datasource": null, + "expr": "sum without(topic) (rate(kafka_server_brokertopicmetrics_bytesoutpersec{namespace=\"$env\",pod=~\"$broker\"}[5m]))", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Zookeeper auth failures per sec", + "title": "Bytes Out/Sec", + "transformations": [], + "transparent": false, "type": "timeseries" } ], - "title": "Zookeeper", + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Throughput", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 30 + "y": 4 }, - "id": 82, + "height": null, + "hideTimeOverride": false, + "id": 22, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { - "datasource": "Prometheus", - "description": ": The number of in-sync replicas (ISRs) for a particular partition should remain fairly static, the only exceptions are when you are expanding your broker cluster or removing partitions. In order to maintain high availability, a healthy Kafka cluster requires a minimum number of ISRs for failover. A replica could be removed from the ISR pool for a couple of reasons: it is too far behind the leaders offset (user-configurable by setting the replica.lag.max.messages configuration parameter), or it has not contacted the leader for some time (configurable with the replica.socket.timeout.ms parameter). No matter the reason, an increase in IsrShrinksPerSec without a corresponding increase in IsrExpandsPerSec shortly thereafter is cause for concern and requires user intervention.The Kafka documentation provides a wealth of information on the user-configurable parameters for brokers.", + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Percent of time the network thread pool is used.\n It should be below 60% or the capacity of threads should be tuned or \n the cluster scaled to cope with the load.", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -2924,7 +1809,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -2935,51 +1820,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "ops" + "unit": "percentunit" }, "overrides": [] }, "gridPos": { - "h": 9, - "w": 12, + "h": 10, + "w": 8, "x": 0, - "y": 27 + "y": 4 }, - "id": 80, + "height": null, + "hideTimeOverride": false, + "id": 23, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -2988,23 +1867,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "rate(kafka_server_replicamanager_isrshrinkspersec{namespace=\"$ns\",pod=~\"$pod\"}[5m])", + "datasource": null, + "expr": "1-kafka_network_socketserver_networkprocessoravgidlepercent{namespace=\"$env\",pod=~\"$broker\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "IsrShrinks per Sec", + "title": "Network processor usage", + "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": "Prometheus", - "description": ": The number of in-sync replicas (ISRs) for a particular partition should remain fairly static, the only exceptions are when you are expanding your broker cluster or removing partitions. In order to maintain high availability, a healthy Kafka cluster requires a minimum number of ISRs for failover. A replica could be removed from the ISR pool for a couple of reasons: it is too far behind the leaders offset (user-configurable by setting the replica.lag.max.messages configuration parameter), or it has not contacted the leader for some time (configurable with the replica.socket.timeout.ms parameter). No matter the reason, an increase in IsrShrinksPerSec without a corresponding increase in IsrExpandsPerSec shortly thereafter is cause for concern and requires user intervention.The Kafka documentation provides a wealth of information on the user-configurable parameters for brokers.", + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Percent of time the IO thread pool is used.\n It should be below 60% or the capacity of threads should be tuned or \n the cluster scaled to cope with the load.", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -3015,7 +1909,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -3026,51 +1920,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "ops" + "unit": "percentunit" }, "overrides": [] }, "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 27 + "h": 10, + "w": 8, + "x": 8, + "y": 4 }, - "id": 83, + "height": null, + "hideTimeOverride": false, + "id": 24, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -3079,38 +1967,80 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "rate(kafka_server_replicamanager_isrexpandspersec{namespace=\"$ns\",pod=~\"$pod\"}[5m])", + "datasource": null, + "expr": "1-kafka_server_kafkarequesthandlerpool_requesthandleravgidlepercent_total{namespace=\"$env\",pod=~\"$broker\"}", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "IsrExpands per Sec", + "title": "Request processor (IO) usage", + "transformations": [], + "transparent": false, "type": "timeseries" } ], - "title": "Isr Shrinks / Expands", + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Thread utilization", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 31 + "y": 5 }, - "id": 53, + "height": null, + "hideTimeOverride": false, + "id": 25, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { - "datasource": "Prometheus", + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Requests per second rated over a 5 minutes period.\n Includes API call and version.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -3121,7 +2051,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -3132,50 +2062,48 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, + "showPoints": "auto", + "spanNulls": false, "stacking": { "group": "A", - "mode": "none" + "mode": "normal" }, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "bytes" + "unit": "reqps" }, "overrides": [] }, "gridPos": { - "h": 9, - "w": 12, + "h": 10, + "w": 8, "x": 0, - "y": 28 + "y": 5 }, - "id": 55, + "height": null, + "hideTimeOverride": false, + "id": 26, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -3184,21 +2112,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_log_log_size{namespace=\"$ns\",pod=~\"$pod\"}) by (topic)", - "legendFormat": "{{topic}}", - "refId": "A" + "datasource": null, + "expr": "sum without(pod,instance,statefulset_kubernetes_io_pod_name)(rate(kafka_network_requestmetrics_requestspersec{namespace=\"$env\",pod=~\"$broker\"}[5m]))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{request}}(v{{version}})", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Log size per Topic", + "title": "Requests rates", + "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": "Prometheus", + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Request Errors per second rated over a 5 minutes period.\n Includes API call and version.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -3209,7 +2154,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -3220,50 +2165,48 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, + "showPoints": "auto", + "spanNulls": false, "stacking": { "group": "A", - "mode": "none" + "mode": "normal" }, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "bytes" + "unit": "reqps" }, "overrides": [] }, "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 28 + "h": 10, + "w": 8, + "x": 8, + "y": 5 }, - "id": 56, + "height": null, + "hideTimeOverride": false, + "id": 27, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -3272,37 +2215,80 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_log_log_size{namespace=\"$ns\",pod=~\"$pod\"}) by (instance)", - "legendFormat": "{{instance}}", - "refId": "A" + "datasource": null, + "expr": "sum without(pod,instance,statefulset_kubernetes_io_pod_name)(rate(kafka_network_requestmetrics_errorspersec{namespace=\"$env\",pod=~\"$broker\",error!=\"NONE\"}[5m]))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{error}}@{{request}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Log size per Broker", + "title": "Error rates", + "transformations": [], + "transparent": false, "type": "timeseries" } ], - "title": "Logs size", + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Request rates", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 32 + "y": 6 }, - "id": 58, + "height": null, + "hideTimeOverride": false, + "id": 28, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { - "datasource": "Prometheus", - "description": "A high value can imply there aren't enough IO threads or the CPU is a bottleneck, or the request queue isnt large enough. The request queue size should match the number of connections.", + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Sum of connections count across cluster by brokers", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -3313,7 +2299,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -3324,75 +2310,85 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "ms" + "unit": "" }, "overrides": [] }, "gridPos": { - "h": 9, - "w": 12, + "h": 10, + "w": 8, "x": 0, - "y": 29 + "y": 6 }, - "id": 60, + "height": null, + "hideTimeOverride": false, + "id": 29, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_network_requestmetrics_requestqueuetimems{namespace=\"$ns\",pod=~\"$pod\",quantile=~\"$percentile\",request=\"Produce\"}", + "datasource": null, + "expr": "sum(kafka_server_socketservermetrics_connection_count{namespace=\"$env\",pod=~\"$broker\"}) by (pod)", + "format": "time_series", "hide": false, - "legendFormat": "{{instance}} - {{quantile}}", - "refId": "A" + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Producer - RequestQueueTimeMs", + "title": "Sum of Connections alive per Broker", + "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": "Prometheus", - "description": "In most cases, a high value can imply slow local storage or the storage is a bottleneck. One should also investigate LogFlushRateAndTimeMs to know how long page flushes are taking, which will also indicate a slow disk. In the case of FetchFollower requests, time spent in LocalTimeMs can be the result of a ZooKeeper write to change the ISR.", + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Sum of rate of connections created across cluster by brokers", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -3403,7 +2399,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -3414,75 +2410,85 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "ms" + "unit": "" }, "overrides": [] }, "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 29 + "h": 10, + "w": 8, + "x": 8, + "y": 6 }, - "id": 61, + "height": null, + "hideTimeOverride": false, + "id": 30, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_network_requestmetrics_localtimems{namespace=\"$ns\",pod=~\"$pod\",quantile=~\"$percentile\",request=\"Produce\"}", + "datasource": null, + "expr": "sum(kafka_server_socketservermetrics_connection_creation_rate{namespace=\"$env\",pod=~\"$broker\"}) by (pod)", + "format": "time_series", "hide": false, - "legendFormat": "{{instance}} - {{quantile}}", - "refId": "A" + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Producer - LocalTimeMs", + "title": "Sum of Connections creation rate per Broker", + "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": "Prometheus", - "description": "A high value can imply a slow network connection. For fetch request, if the remote time is high, it could be that there is not enough data to give in a fetch response. This can happen when the consumer or replica is caught up and there is no new incoming data. If this is the case, remote time will be close to the max wait time, which is normal. Max wait time is configured via replica.fetch.wait.max.ms and fetch.max.wait.ms.\n", + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Sum of rate of connections closed across cluster by brokers", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -3493,7 +2499,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -3504,50 +2510,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "ms" + "unit": "" }, "overrides": [] }, "gridPos": { "h": 10, "w": 8, - "x": 0, - "y": 38 + "x": 16, + "y": 6 }, - "id": 62, + "height": null, + "hideTimeOverride": false, + "id": 31, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -3556,23 +2557,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_network_requestmetrics_remotetimems{namespace=\"$ns\",pod=~\"$pod\",quantile=~\"$percentile\",request=\"Produce\"}", + "datasource": null, + "expr": "sum(kafka_server_socketservermetrics_connection_close_rate{namespace=\"$env\",pod=~\"$broker\"}) by (pod)", + "format": "time_series", "hide": false, - "legendFormat": "{{instance}} - {{quantile}}", - "refId": "A" + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Producer - RemoteTimeMs", + "title": "Sum of Connections close rate per Broker", + "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": "Prometheus", - "description": "A high value can imply there aren't enough network threads or the network cant dequeue responses quickly enough, causing back pressure in the response queue.\n", + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Sum of connections count across cluster by listeners", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -3583,7 +2599,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -3594,50 +2610,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "ms" + "unit": "" }, "overrides": [] }, "gridPos": { "h": 10, "w": 8, - "x": 8, - "y": 38 + "x": 0, + "y": 7 }, - "id": 63, + "height": null, + "hideTimeOverride": false, + "id": 32, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -3646,23 +2657,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_network_requestmetrics_responsequeuetimems{namespace=\"$ns\",pod=~\"$pod\",quantile=~\"$percentile\",request=\"Produce\"}", + "datasource": null, + "expr": "sum(kafka_server_socketservermetrics_connection_count{namespace=\"$env\",pod=~\"$broker\"}) by (listener)", + "format": "time_series", "hide": false, - "legendFormat": "{{instance}} - {{quantile}}", - "refId": "A" + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{listener}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Producer - ResponseQueueTimeMs", + "title": "Sum of Connections alive per Listener", + "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": "Prometheus", - "description": "A high value can imply the zero-copy from disk to the network is slow, or the network is the bottleneck because the network cant dequeue responses of the TCP socket as quickly as theyre being created. If the network buffer gets full, Kafka will block.\n", + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Sum of rate of connections created across cluster by listener", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -3673,7 +2699,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -3684,51 +2710,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "ms" + "unit": "" }, "overrides": [] }, "gridPos": { "h": 10, "w": 8, - "x": 16, - "y": 38 + "x": 8, + "y": 7 }, - "id": 64, + "height": null, + "hideTimeOverride": false, + "id": 33, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -3737,38 +2757,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_network_requestmetrics_responsesendtimems{namespace=\"$ns\",pod=~\"$pod\",quantile=~\"$percentile\",request=\"Produce\"}", + "datasource": null, + "expr": "sum(kafka_server_socketservermetrics_connection_creation_rate{namespace=\"$env\",pod=~\"$broker\"}) by (listener)", + "format": "time_series", "hide": false, - "legendFormat": "{{instance}} - {{quantile}}", - "refId": "A" + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{listener}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Producer - ResponseSendTimeMs", + "title": "Sum of Connections creation rate per Listener", + "transformations": [], + "transparent": false, "type": "timeseries" - } - ], - "title": "Producer Performance", - "type": "row" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 33 - }, - "id": 68, - "panels": [ + }, { - "datasource": "Prometheus", - "description": "A high value can imply there aren't enough IO threads or the CPU is a bottleneck, or the request queue isnt large enough. The request queue size should match the number of connections.", + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Sum of rate of connections closed across cluster by listener", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -3779,7 +2799,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -3790,76 +2810,127 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "ms" + "unit": "" }, "overrides": [] }, "gridPos": { - "h": 9, - "w": 12, - "x": 0, - "y": 30 + "h": 10, + "w": 8, + "x": 16, + "y": 7 }, - "id": 69, + "height": null, + "hideTimeOverride": false, + "id": 34, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_network_requestmetrics_requestqueuetimems{namespace=\"$ns\",pod=~\"$pod\",quantile=~\"$percentile\",request=\"Fetch\"}", + "datasource": null, + "expr": "sum(kafka_server_socketservermetrics_connection_close_rate{namespace=\"$env\",pod=~\"$broker\"}) by (listener)", + "format": "time_series", "hide": false, - "legendFormat": "{{instance}} - {{quantile}}", - "refId": "A" + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{listener}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Consumer - RequestQueueTimeMs", + "title": "Sum of Connections close rate per Listener", + "transformations": [], + "transparent": false, "type": "timeseries" - }, + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Connections", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 35, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ { - "datasource": "Prometheus", - "description": "In most cases, a high value can imply slow local storage or the storage is a bottleneck. One should also investigate LogFlushRateAndTimeMs to know how long page flushes are taking, which will also indicate a slow disk. In the case of FetchFollower requests, time spent in LocalTimeMs can be the result of a ZooKeeper write to change the ISR.", + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Rate of ISR shrinks per second.\n If this value is continuously higher than 0, then troubleshoot cause of ISR changing constantly.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -3870,7 +2941,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -3881,75 +2952,85 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "ms" + "unit": "" }, "overrides": [] }, "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 30 + "h": 10, + "w": 8, + "x": 0, + "y": 8 }, - "id": 70, + "height": null, + "hideTimeOverride": false, + "id": 36, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_network_requestmetrics_localtimems{namespace=\"$ns\",pod=~\"$pod\",quantile=~\"$percentile\",request=\"Fetch\"}", + "datasource": null, + "expr": "rate(kafka_server_replicamanager_isrshrinkspersec{namespace=\"$env\",pod=~\"$broker\"}[5m])", + "format": "time_series", "hide": false, - "legendFormat": "{{instance}} - {{quantile}}", - "refId": "A" + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Consumer - LocalTimeMs", + "title": "Rate of ISR Shrinks/sec", + "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": "Prometheus", - "description": "A high value can imply a slow network connection. For fetch request, if the remote time is high, it could be that there is not enough data to give in a fetch response. This can happen when the consumer or replica is caught up and there is no new incoming data. If this is the case, remote time will be close to the max wait time, which is normal. Max wait time is configured via replica.fetch.wait.max.ms and fetch.max.wait.ms.\n", + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Rate of ISR expands per second.\n If this value is continuously higher than 0, then troubleshoot cause of ISR changing constantly.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -3960,7 +3041,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -3971,50 +3052,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "ms" + "unit": "" }, "overrides": [] }, "gridPos": { - "h": 9, + "h": 10, "w": 8, - "x": 0, - "y": 39 + "x": 8, + "y": 8 }, - "id": 71, + "height": null, + "hideTimeOverride": false, + "id": 37, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -4023,23 +3099,80 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_network_requestmetrics_remotetimems{namespace=\"$ns\",pod=~\"$pod\",quantile=~\"$percentile\",request=\"Fetch\"}", + "datasource": null, + "expr": "rate(kafka_server_replicamanager_isrexpandspersec{namespace=\"$env\",pod=~\"$broker\"}[5m])", + "format": "time_series", "hide": false, - "legendFormat": "{{instance}} - {{quantile}}", - "refId": "A" + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Consumer - RemoteTimeMs", + "title": "Rate of ISR Expands/sec", + "transformations": [], + "transparent": false, "type": "timeseries" - }, + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "In-Sync Replicas", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 9 + }, + "height": null, + "hideTimeOverride": false, + "id": 38, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ { - "datasource": "Prometheus", - "description": "A high value can imply there aren't enough network threads or the network cant dequeue responses quickly enough, causing back pressure in the response queue.\n", + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Time expend on the request queue.\n Moved from network socket to request queue by Network threads.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -4050,7 +3183,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -4061,51 +3194,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 9, + "h": 10, "w": 8, - "x": 8, - "y": 39 + "x": 0, + "y": 9 }, - "id": 72, + "height": null, + "hideTimeOverride": false, + "id": 39, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -4114,23 +3241,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_network_requestmetrics_responsequeuetimems{namespace=\"$ns\",pod=~\"$pod\",quantile=~\"$percentile\",request=\"Fetch\"}", + "datasource": null, + "expr": "kafka_network_requestmetrics_requestqueuetimems{namespace=\"$env\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"Produce\"}", + "format": "time_series", "hide": false, - "legendFormat": "{{instance}} - {{quantile}}", - "refId": "A" + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Consumer - ResponseQueueTimeMs", + "title": "Produce: Request Queue Time", + "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": "Prometheus", - "description": "A high value can imply the zero-copy from disk to the network is slow, or the network is the bottleneck because the network cant dequeue responses of the TCP socket as quickly as theyre being created. If the network buffer gets full, Kafka will block.\n", + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Time expend doing local IO.\n Moved from request queue to storage device operations by IO threads.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -4141,7 +3283,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -4152,51 +3294,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 9, + "h": 10, "w": 8, - "x": 16, - "y": 39 + "x": 8, + "y": 9 }, - "id": 73, + "height": null, + "hideTimeOverride": false, + "id": 40, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -4205,38 +3341,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_network_requestmetrics_responsesendtimems{namespace=\"$ns\",pod=~\"$pod\",quantile=~\"$percentile\",request=\"Fetch\"}", + "datasource": null, + "expr": "kafka_network_requestmetrics_localtimems{namespace=\"$env\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"Produce\"}", + "format": "time_series", "hide": false, - "legendFormat": "{{instance}} - {{quantile}}", - "refId": "A" + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Consumer - ResponseSendTimeMs", + "title": "Produce: Local Time", + "transformations": [], + "transparent": false, "type": "timeseries" - } - ], - "title": "Consumer Performance", - "type": "row" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 34 - }, - "id": 66, - "panels": [ + }, { - "datasource": "Prometheus", - "description": "A high value can imply there aren't enough IO threads or the CPU is a bottleneck, or the request queue isnt large enough. The request queue size should match the number of connections.", + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Time expend waiting for coordination with other brokers/internal condition.\n At purgatory.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -4247,7 +3383,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -4258,76 +3394,85 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 9, - "w": 12, - "x": 0, - "y": 31 + "h": 10, + "w": 8, + "x": 16, + "y": 9 }, - "id": 74, + "height": null, + "hideTimeOverride": false, + "id": 41, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_network_requestmetrics_requestqueuetimems{namespace=\"$ns\",pod=~\"$pod\",quantile=~\"$percentile\",request=\"FetchFollower\"}", + "datasource": null, + "expr": "kafka_network_requestmetrics_remotetimems{namespace=\"$env\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"Produce\"}", + "format": "time_series", "hide": false, - "legendFormat": "{{instance}} - {{quantile}}", - "refId": "A" + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "FetchFollower - RequestQueueTimeMs", + "title": "Produce: Remote Time", + "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": "Prometheus", - "description": "In most cases, a high value can imply slow local storage or the storage is a bottleneck. One should also investigate LogFlushRateAndTimeMs to know how long page flushes are taking, which will also indicate a slow disk. In the case of FetchFollower requests, time spent in LocalTimeMs can be the result of a ZooKeeper write to change the ISR.", + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Time expend waiting in response queue.\n Moved from purgatory to response queue by IO threads.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -4338,7 +3483,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -4349,76 +3494,85 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 31 + "h": 10, + "w": 8, + "x": 0, + "y": 10 }, - "id": 75, + "height": null, + "hideTimeOverride": false, + "id": 42, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_network_requestmetrics_localtimems{namespace=\"$ns\",pod=~\"$pod\",quantile=~\"$percentile\",request=\"FetchFollower\"}", + "datasource": null, + "expr": "kafka_network_requestmetrics_responsequeuetimems{namespace=\"$env\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"Produce\"}", + "format": "time_series", "hide": false, - "legendFormat": "{{instance}} - {{quantile}}", - "refId": "A" + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "FetchFollower - LocalTimeMs", + "title": "Produce: Response Queue Time", + "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": "Prometheus", - "description": "A high value can imply a slow network connection. For fetch request, if the remote time is high, it could be that there is not enough data to give in a fetch response. This can happen when the consumer or replica is caught up and there is no new incoming data. If this is the case, remote time will be close to the max wait time, which is normal. Max wait time is configured via replica.fetch.wait.max.ms and fetch.max.wait.ms.\n", + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Time expend delivering response.\n Moved from response queue to client by Networkc threads.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -4429,7 +3583,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -4440,50 +3594,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 9, + "h": 10, "w": 8, - "x": 0, - "y": 40 + "x": 8, + "y": 10 }, - "id": 76, + "height": null, + "hideTimeOverride": false, + "id": 43, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -4492,23 +3641,80 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_network_requestmetrics_remotetimems{namespace=\"$ns\",pod=~\"$pod\",quantile=~\"$percentile\",request=\"FetchFollower\"}", + "datasource": null, + "expr": "kafka_network_requestmetrics_responsesendtimems{namespace=\"$env\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"Produce\"}", + "format": "time_series", "hide": false, - "legendFormat": "{{instance}} - {{quantile}}", - "refId": "A" + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "FetchFollower - RemoteTimeMs", + "title": "Produce: Response Send Time", + "transformations": [], + "transparent": false, "type": "timeseries" - }, + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Request latency: Producer", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 11 + }, + "height": null, + "hideTimeOverride": false, + "id": 44, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ { - "datasource": "Prometheus", - "description": "A high value can imply there aren't enough network threads or the network cant dequeue responses quickly enough, causing back pressure in the response queue.\n", + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Time expend on the request queue.\n Moved from network socket to request queue by Network threads.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -4519,7 +3725,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -4530,50 +3736,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 9, + "h": 10, "w": 8, - "x": 8, - "y": 40 + "x": 0, + "y": 11 }, - "id": 77, + "height": null, + "hideTimeOverride": false, + "id": 45, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -4582,23 +3783,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_network_requestmetrics_responsequeuetimems{namespace=\"$ns\",pod=~\"$pod\",quantile=~\"$percentile\",request=\"FetchFollower\"}", + "datasource": null, + "expr": "kafka_network_requestmetrics_requestqueuetimems{namespace=\"$env\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"Fetch\"}", + "format": "time_series", "hide": false, - "legendFormat": "{{instance}} - {{quantile}}", - "refId": "A" + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "FetchFollower - ResponseQueueTimeMs", + "title": "Fetch: Request Queue Time", + "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": "Prometheus", - "description": "A high value can imply the zero-copy from disk to the network is slow, or the network is the bottleneck because the network cant dequeue responses of the TCP socket as quickly as theyre being created. If the network buffer gets full, Kafka will block.\n", + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Time expend doing local IO.\n Moved from request queue to storage device operations by IO threads.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -4609,7 +3825,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -4620,51 +3836,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 9, + "h": 10, "w": 8, - "x": 16, - "y": 40 + "x": 8, + "y": 11 }, - "id": 78, + "height": null, + "hideTimeOverride": false, + "id": 46, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -4673,37 +3883,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_network_requestmetrics_responsesendtimems{namespace=\"$ns\",pod=~\"$pod\",quantile=~\"$percentile\",request=\"FetchFollower\"}", + "datasource": null, + "expr": "kafka_network_requestmetrics_localtimems{namespace=\"$env\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"Fetch\"}", + "format": "time_series", "hide": false, - "legendFormat": "{{instance}} - {{quantile}}", - "refId": "A" + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "FetchFollower - ResponseSendTimeMs", + "title": "Fetch: Local Time", + "transformations": [], + "transparent": false, "type": "timeseries" - } - ], - "title": "Fetch Follower Performance", - "type": "row" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 35 - }, - "id": 102, - "panels": [ + }, { - "datasource": "Prometheus", + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Time expend waiting for coordination with other brokers/internal condition.\n At purgatory.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -4714,7 +3925,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -4725,50 +3936,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "short" + "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 32 + "h": 10, + "w": 8, + "x": 16, + "y": 11 }, - "id": 98, + "height": null, + "hideTimeOverride": false, + "id": 47, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -4777,22 +3983,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_server_socketservermetrics_connection_count{ namespace=\"$ns\", pod=~\"$pod\"}) by (listener)", + "datasource": null, + "expr": "kafka_network_requestmetrics_remotetimems{namespace=\"$env\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"Fetch\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{listener}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{pod}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Connections count per listener", + "title": "Fetch: Remote Time", + "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": "Prometheus", + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Time expend waiting in response queue.\n Moved from purgatory to response queue by IO threads.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -4803,7 +4025,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -4814,50 +4036,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "short" + "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 32 + "h": 10, + "w": 8, + "x": 0, + "y": 12 }, - "id": 100, + "height": null, + "hideTimeOverride": false, + "id": 48, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -4866,22 +4083,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_server_socketservermetrics_connection_count{ namespace=\"$ns\", pod=~\"$pod\"}) by (instance)", + "datasource": null, + "expr": "kafka_network_requestmetrics_responsequeuetimems{namespace=\"$env\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"Fetch\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{pod}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Connections count per broker", + "title": "Fetch: Response Queue Time", + "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": "Prometheus", + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Time expend delivering response.\n Moved from response queue to client by Networkc threads.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -4892,7 +4125,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -4903,50 +4136,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "short" + "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 40 + "h": 10, + "w": 8, + "x": 8, + "y": 12 }, - "id": 104, + "height": null, + "hideTimeOverride": false, + "id": 49, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -4955,22 +4183,80 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_server_socketservermetrics_connection_creation_rate{ namespace=\"$ns\", pod=~\"$pod\"}) by (listener)", + "datasource": null, + "expr": "kafka_network_requestmetrics_responsesendtimems{namespace=\"$env\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"Fetch\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{listener}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{pod}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Connections creation rate per listener", + "title": "Fetch: Response Send Time", + "transformations": [], + "transparent": false, "type": "timeseries" - }, + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Request latency: Consumer Fetch", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 13 + }, + "height": null, + "hideTimeOverride": false, + "id": 50, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ { - "datasource": "Prometheus", + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Time expend on the request queue.\n Moved from network socket to request queue by Network threads.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -4981,7 +4267,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -4992,50 +4278,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "short" + "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 40 + "h": 10, + "w": 8, + "x": 0, + "y": 13 }, - "id": 106, + "height": null, + "hideTimeOverride": false, + "id": 51, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -5044,22 +4325,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_server_socketservermetrics_connection_creation_rate{ namespace=\"$ns\", pod=~\"$pod\"}) by (instance)", + "datasource": null, + "expr": "kafka_network_requestmetrics_requestqueuetimems{namespace=\"$env\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"FetchFollower\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{pod}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Connections creation rate per instance", + "title": "Fetch: Request Queue Time", + "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": "Prometheus", + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Time expend doing local IO.\n Moved from request queue to storage device operations by IO threads.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -5070,7 +4367,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -5081,50 +4378,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "short" + "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 48 + "h": 10, + "w": 8, + "x": 8, + "y": 13 }, - "id": 108, + "height": null, + "hideTimeOverride": false, + "id": 52, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -5133,22 +4425,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_server_socketservermetrics_connection_close_rate{ namespace=\"$ns\", pod=~\"$pod\"}) by (listener)", + "datasource": null, + "expr": "kafka_network_requestmetrics_localtimems{namespace=\"$env\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"FetchFollower\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{listener}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{pod}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Connections close rate per listener", + "title": "Fetch: Local Time", + "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": "Prometheus", + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Time expend waiting for coordination with other brokers/internal condition.\n At purgatory.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -5159,7 +4467,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -5170,50 +4478,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "short" + "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 48 + "h": 10, + "w": 8, + "x": 16, + "y": 13 }, - "id": 110, + "height": null, + "hideTimeOverride": false, + "id": 53, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -5222,23 +4525,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_server_socketservermetrics_connection_close_rate{ namespace=\"$ns\", pod=~\"$pod\"}) by (instance)", + "datasource": null, + "expr": "kafka_network_requestmetrics_remotetimems{namespace=\"$env\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"FetchFollower\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{pod}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Connections close rate per instance", + "title": "Fetch: Remote Time", + "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": "Prometheus", - "description": "Tracks the amount of time Acceptor is blocked from accepting connections. See KIP-402 for more details.", + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Time expend waiting in response queue.\n Moved from purgatory to response queue by IO threads.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -5249,7 +4567,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -5260,51 +4578,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "percent" + "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 12, + "h": 10, + "w": 8, "x": 0, - "y": 56 + "y": 14 }, - "id": 112, + "height": null, + "hideTimeOverride": false, + "id": 54, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -5313,22 +4625,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_network_acceptor_acceptorblockedpercent{ namespace=\"$ns\", pod=~\"$pod\"}", + "datasource": null, + "expr": "kafka_network_requestmetrics_responsequeuetimems{namespace=\"$env\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"FetchFollower\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{instance}} - {{listener}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{pod}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Acceptor Blocked Percentage", + "title": "Fetch: Response Queue Time", + "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": "Prometheus", + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Time expend delivering response.\n Moved from response queue to client by Networkc threads.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -5339,7 +4667,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -5350,50 +4678,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "short" + "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 56 + "h": 10, + "w": 8, + "x": 8, + "y": 14 }, - "id": 114, + "height": null, + "hideTimeOverride": false, + "id": 55, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -5402,38 +4725,80 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_server_socketservermetrics_connections{ namespace=\"$ns\", pod=~\"$pod\"}) by (client_software_name, client_software_version)", + "datasource": null, + "expr": "kafka_network_requestmetrics_responsesendtimems{namespace=\"$env\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"FetchFollower\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{client_software_name}} {{client_software_version}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{pod}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Connections per client version", + "title": "Fetch: Response Send Time", + "transformations": [], + "transparent": false, "type": "timeseries" } ], - "title": "Connections", + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Request latency: Replica Fetch", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 36 + "y": 15 }, - "id": 120, + "height": null, + "hideTimeOverride": false, + "id": 56, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { - "datasource": "Prometheus", - "description": "Number of consumer groups per group coordinator", + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Number of groups managed by Broker", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -5444,7 +4809,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -5455,50 +4820,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "short" + "unit": "" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 12, + "h": 10, + "w": 8, "x": 0, - "y": 33 + "y": 15 }, - "id": 116, + "height": null, + "hideTimeOverride": false, + "id": 57, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -5507,24 +4867,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_coordinator_group_groupmetadatamanager_numgroups{ namespace=\"$ns\", pod=~\"$pod\"}", + "datasource": null, + "expr": "kafka_coordinator_group_groupmetadatamanager_numgroups{namespace=\"$env\",pod=~\"$broker\"}", + "format": "time_series", + "hide": false, "instant": false, "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Consumer groups number per coordinator", + "title": "Number of Groups per Broker", + "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": "Prometheus", - "description": "Number of consumer group per state", + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Number of stable groups managed by Broker", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -5535,7 +4909,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -5546,50 +4920,47 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, + "showPoints": "auto", + "spanNulls": false, "stacking": { - "group": "A", - "mode": "none" + "mode": "normal" }, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "short" + "unit": "" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 33 + "h": 10, + "w": 8, + "x": 8, + "y": 15 }, - "id": 118, + "height": null, + "hideTimeOverride": false, + "id": 58, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -5598,63 +4969,136 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupsstable{ namespace=\"$ns\", pod=~\"$pod\"})", + "datasource": null, + "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupsstable{namespace=\"$env\",pod=~\"$broker\"})", + "format": "time_series", + "hide": false, "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "stable", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupspreparingrebalance{ namespace=\"$ns\", pod=~\"$pod\"})", + "datasource": null, + "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupspreparingrebalance{namespace=\"$env\",pod=~\"$broker\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "preparing-rebalance", - "refId": "B" + "intervalFactor": 2, + "legendFormat": "preparing_rebalance", + "metric": "", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupsdead{ namespace=\"$ns\", pod=~\"$pod\"})", + "datasource": null, + "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupsdead{namespace=\"$env\",pod=~\"$broker\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "dead", - "refId": "C" + "metric": "", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupscompletingrebalance{ namespace=\"$ns\", pod=~\"$pod\"})", + "datasource": null, + "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupscompletingrebalance{namespace=\"$env\",pod=~\"$broker\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "completing-rebalance", - "refId": "D" + "intervalFactor": 2, + "legendFormat": "completing_rebalance", + "metric": "", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupsempty{ namespace=\"$ns\", pod=~\"$pod\"})", + "datasource": null, + "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupsempty{namespace=\"$env\",pod=~\"$broker\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "empty", - "refId": "E" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Nb consumer groups per state", + "title": "Number of Groups per Broker per Status", + "transformations": [], + "transparent": false, "type": "timeseries" } ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "Group Coordinator", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 37 + "y": 16 }, - "id": 46, + "height": null, + "hideTimeOverride": false, + "id": 59, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { - "datasource": "Prometheus", - "description": "The number of messages produced converted to match the log.message.format.version.", + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Sum of produce message conversions per second.\n This value increases when the broker receives produce messages from clients using older versions.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -5665,7 +5109,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -5676,77 +5120,85 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "ops" + "unit": "opsps" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 10, "w": 8, "x": 0, - "y": 34 + "y": 16 }, - "id": 48, + "height": null, + "hideTimeOverride": false, + "id": 60, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_server_brokertopicmetrics_producemessageconversionspersec{namespace=\"$ns\",pod=~\"$pod\"})", + "datasource": null, + "expr": "sum(kafka_server_brokertopicmetrics_producemessageconversionspersec{namespace=\"$env\",pod=~\"$broker\"})", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{topic}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Number of produced message conversion", + "title": "Sum of Produce conversion rate per sec", + "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": "Prometheus", - "description": "The number of messages consumed converted at consumer to match the log.message.format.version.", + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Sum of fetch message conversions per second.\n This value increases when the broker receives fetch messages from clients using older versions.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -5757,7 +5209,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -5768,255 +5220,292 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "ops" + "unit": "opsps" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 10, "w": 8, "x": 8, - "y": 34 + "y": 16 }, - "id": 51, + "height": null, + "hideTimeOverride": false, + "id": 61, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_server_brokertopicmetrics_fetchmessageconversionspersec{namespace=\"$ns\",pod=~\"$pod\"})", + "datasource": null, + "expr": "sum(kafka_server_brokertopicmetrics_fetchmessageconversionspersec{namespace=\"$env\",pod=~\"$broker\"})", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{topic}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Number of consumed message conversion", + "title": "Sum of Fetch conversion rate per sec", + "transformations": [], + "transparent": false, "type": "timeseries" }, { "cacheTimeout": null, - "datasource": null, - "description": "Number of connection per client version", + "datasource": "${DS_PROMETHEUS}", + "description": "Sum of connections aggregated by client version and name.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" } }, - "decimals": 0, "mappings": [], - "unit": "short" + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 10, "w": 8, "x": 16, - "y": 34 + "y": 16 }, - "id": 96, + "height": null, + "hideTimeOverride": false, + "id": 62, "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { - "calcs": [], - "displayMode": "table", - "placement": "bottom", - "values": [ - "value" - ] - }, - "pieType": "pie", - "reduceOptions": { "calcs": [ - "lastNotNull" + "max", + "mean", + "last" ], - "fields": "", - "values": false + "displayMode": "table", + "placement": "bottom" }, "tooltip": { "mode": "single" } }, + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_server_socketservermetrics_connections{ namespace=\"$ns\", pod=~\"$pod\"}) by (client_software_name, client_software_version) ", + "datasource": null, + "expr": "sum(kafka_server_socketservermetrics_connections{namespace=\"$env\",pod=~\"$broker\"}) by (client_software_name,client_software_version)", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{client_software_name}} - {{client_software_version}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{client_software_name}} (v{{client_software_version}})", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Client version repartition", - "type": "piechart" + "title": "Sum of Connections per version", + "transformations": [], + "transparent": false, + "type": "timeseries" } ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "Message Conversion", + "transformations": [], + "transparent": false, "type": "row" } ], - "refresh": "1m", - "schemaVersion": 30, + "refresh": "30s", + "rows": [], + "schemaVersion": 12, + "sharedCrosshair": false, "style": "dark", - "tags": [], + "tags": [ + "confluent", + "kafka" + ], "templating": { "list": [ { "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { "selected": false, - "text": "dev", - "value": "dev" + "tags": [], + "text": null, + "value": null }, - "datasource": "Prometheus", - "definition": "label_values(namespace)", - "description": null, - "error": null, + "datasource": "${DS_PROMETHEUS}", "hide": 0, "includeAll": false, - "label": "Namespace", + "label": "Environment", "multi": false, - "name": "ns", + "name": "env", "options": [], - "query": { - "query": "label_values(namespace)", - "refId": "Prometheus-namespace-Variable-Query" - }, + "query": "label_values(namespace)", "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, "type": "query", "useTags": false }, { "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { - "selected": true, - "text": [ - "All" - ], - "value": [ - "$__all" - ] + "selected": false, + "tags": [], + "text": null, + "value": null }, - "datasource": "Prometheus", - "definition": "label_values(kafka_server_kafkaserver_brokerstate{namespace=\"$ns\"}, pod)", - "description": null, - "error": null, + "datasource": "${DS_PROMETHEUS}", "hide": 0, "includeAll": true, - "label": "Pod", + "label": "Broker", "multi": true, - "name": "pod", + "name": "broker", "options": [], - "query": { - "query": "label_values(kafka_server_kafkaserver_brokerstate{namespace=\"$ns\"}, pod)", - "refId": "Prometheus-pod-Variable-Query" - }, + "query": "label_values(kafka_server_replicamanager_leadercount{namespace=\"$env\"}, pod)", "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, "type": "query", "useTags": false }, { - "allValue": "", + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { - "selected": true, - "text": [ - "0.95" - ], - "value": [ - "0.95" - ] + "selected": false, + "tags": [], + "text": null, + "value": null }, - "datasource": "Prometheus", - "definition": "label_values(quantile)", - "description": null, - "error": null, + "datasource": "${DS_PROMETHEUS}", "hide": 0, - "includeAll": true, - "label": "Percentile", - "multi": true, - "name": "percentile", + "includeAll": false, + "label": "Quantile", + "multi": false, + "name": "quantile", "options": [], - "query": { - "query": "label_values(quantile)", - "refId": "Prometheus-percentile-Variable-Query" - }, + "query": "label_values(kafka_network_requestmetrics_requestqueuetimems{namespace=\"$env\"}, quantile)", "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, "type": "query", "useTags": false } ] }, "time": { - "from": "now-12h", + "from": "now-1h", "to": "now" }, "timepicker": { + "hidden": false, "refresh_intervals": [ "5s", "10s", @@ -6042,7 +5531,7 @@ ] }, "timezone": "browser", - "title": "Kafka cluster", - "uid": "qu-QZdfZz", - "version": 2 -} \ No newline at end of file + "title": "Kafka cluster - v2", + "uid": null, + "version": 0 +} diff --git a/cfk-prometheus-grafana/grafana/kafka-connect-cluster.json b/cfk-prometheus-grafana/grafana/kafka-connect-cluster.json index f0318d2a..c230452e 100644 --- a/cfk-prometheus-grafana/grafana/kafka-connect-cluster.json +++ b/cfk-prometheus-grafana/grafana/kafka-connect-cluster.json @@ -1,370 +1,547 @@ { + "__inputs": [ + { + "description": "", + "label": "Prometheus", + "name": "DS_PROMETHEUS", + "pluginId": "prometheus", + "pluginName": "Prometheus", + "type": "datasource" + } + ], "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "target": { - "limit": 100, - "matchAny": false, - "tags": [], - "type": "dashboard" - }, - "type": "dashboard" - } - ] + "list": [] }, - "description": "Monitor Apache Kafka Connect", + "description": "Overview of the Kafka Connect cluster", "editable": true, "gnetId": null, - "graphTooltip": 0, - "id": 5, - "iteration": 1632255569594, + "hideControls": false, + "id": null, "links": [], "panels": [ { + "cacheTimeout": null, "collapsed": false, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }, - "id": 199, + "height": null, + "hideTimeOverride": false, + "id": 1, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [], - "title": "General", + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Overview", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "datasource": "Prometheus", + "description": "Kafka Connect online workers returning metrics.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "decimals": 0, + "custom": {}, + "decimals": null, "mappings": [], - "noValue": "0", + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] - } + }, + "unit": "none" }, "overrides": [] }, "gridPos": { - "h": 3, + "h": 5, "w": 4, "x": 0, - "y": 1 + "y": 0 }, - "id": 212, + "height": null, + "hideTimeOverride": false, + "id": 2, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ - "mean" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_connect_connect_worker_metrics_connector_total_task_count{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\"})", - "instant": true, + "datasource": null, + "expr": "count(kafka_connect_app_info{namespace=\"$env\",app=\"$connect_cluster\",version!=\"\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Tasks Total", + "title": "Connect: Online Workers", + "transformations": [], + "transparent": false, "type": "stat" }, { + "cacheTimeout": null, "datasource": "Prometheus", + "description": "Number of tasks deployed on Kafka Connect cluster.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "decimals": 0, + "custom": {}, + "decimals": null, "mappings": [], - "noValue": "0", + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] - } + }, + "unit": "none" }, "overrides": [] }, "gridPos": { - "h": 3, + "h": 5, "w": 4, "x": 4, - "y": 1 + "y": 0 }, - "id": 213, + "height": null, + "hideTimeOverride": false, + "id": 3, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ - "mean" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_connect_connect_worker_metrics_connector_running_task_count{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\"})", - "instant": true, + "datasource": null, + "expr": "sum(kafka_connect_connect_worker_metrics_connector_total_task_count{namespace=\"$env\",app=\"$connect_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Tasks Running", + "title": "Connect: Sum of Total Tasks", + "transformations": [], + "transparent": false, "type": "stat" }, { + "cacheTimeout": null, "datasource": "Prometheus", + "description": "Number of Running Tasks on the Kafka Connect cluster.\n Ideally, this number should be equal to the total number of tasks deployed.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "decimals": 0, + "custom": {}, + "decimals": null, "mappings": [], - "noValue": "0", + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" }, { - "color": "orange", - "value": 1 + "color": "green", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" } ] - } + }, + "unit": "none" }, "overrides": [] }, "gridPos": { - "h": 3, + "h": 5, "w": 4, "x": 8, - "y": 1 + "y": 0 }, - "id": 215, + "height": null, + "hideTimeOverride": false, + "id": 4, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ - "mean" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_connect_connect_worker_metrics_connector_paused_task_count{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\"})", - "instant": true, + "datasource": null, + "expr": "sum(kafka_connect_connect_worker_metrics_connector_running_task_count{namespace=\"$env\",app=\"$connect_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Tasks Paused", + "title": "Connect: Sum of Running Tasks", "transformations": [], + "transparent": false, "type": "stat" }, { + "cacheTimeout": null, "datasource": "Prometheus", + "description": "Number of Paused Tasks on the Kafka Connect cluster.\n Ideally, this number should be zero, as tasks should be running.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "decimals": 0, + "custom": {}, + "decimals": null, "mappings": [], - "noValue": "0", + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" }, { - "color": "red", - "value": 1 + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" } ] - } + }, + "unit": "none" }, "overrides": [] }, "gridPos": { - "h": 3, + "h": 5, "w": 4, "x": 12, - "y": 1 + "y": 0 }, - "id": 214, + "height": null, + "hideTimeOverride": false, + "id": 5, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_connect_connect_worker_metrics_connector_failed_task_count{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\"})", - "instant": true, + "datasource": null, + "expr": "sum(kafka_connect_connect_worker_metrics_connector_paused_task_count{namespace=\"$env\",app=\"$connect_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Tasks Failed", + "title": "Connect: Sum of Paused Tasks", "transformations": [], + "transparent": false, "type": "stat" }, { + "cacheTimeout": null, "datasource": "Prometheus", + "description": "Number of Paused Tasks on the Kafka Connect cluster.\n Ideally, this number should be zero, as tasks should be running.\n It's recommended alerting when this value is higher than 0.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "decimals": 0, + "custom": {}, + "decimals": null, "mappings": [], - "noValue": "0", + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { "color": "green", - "value": null + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" }, { - "color": "yellow", - "value": 1 + "color": "red", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" } ] - } + }, + "unit": "none" }, "overrides": [] }, "gridPos": { - "h": 3, + "h": 5, "w": 4, "x": 16, - "y": 1 + "y": 0 }, - "id": 216, + "height": null, + "hideTimeOverride": false, + "id": 6, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ - "mean" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_connect_connect_worker_metrics_connector_unassigned_task_count{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\"})", - "instant": true, + "datasource": null, + "expr": "sum(kafka_connect_connect_worker_metrics_connector_failed_task_count{namespace=\"$env\",app=\"$connect_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Tasks Unassigned", + "title": "Connect: Sum of Failed Tasks", "transformations": [], + "transparent": false, "type": "stat" }, { + "cacheTimeout": null, "datasource": "Prometheus", + "description": "Informative value. Time since last rebalance.\n When this value is continuously and repeatedly low means some connectors are failing and rebalancing is triggered constantly.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "decimals": 0, + "custom": {}, + "decimals": null, "mappings": [], - "noValue": "0", + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "green", - "value": null - }, - { - "color": "purple", - "value": 1 + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] - } + }, + "unit": "clockms" }, "overrides": [] }, "gridPos": { - "h": 3, + "h": 5, "w": 4, "x": 20, - "y": 1 + "y": 0 }, - "id": 217, + "height": null, + "hideTimeOverride": false, + "id": 7, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", "graphMode": "none", @@ -372,316 +549,434 @@ "orientation": "auto", "reduceOptions": { "calcs": [ - "mean" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_connect_connect_worker_metrics_connector_destroyed_task_count{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\"})", - "instant": true, + "datasource": null, + "expr": "kafka_connect_connect_worker_rebalance_metrics_time_since_last_rebalance_ms{namespace=\"$env\",app=\"$connect_cluster\"} >= 0", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Tasks Destroyed", + "title": "Connect: Time since last rebalance", "transformations": [], + "transparent": false, "type": "stat" }, { "cacheTimeout": null, + "color": { + "mode": "thresholds" + }, + "columns": [], "datasource": "Prometheus", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - } - }, - "decimals": 0, - "mappings": [], - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "paused" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FF9830", - "mode": "fixed" - } - } - ] + "align": "auto", + "displayMode": "auto", + "filterable": false }, - { - "matcher": { - "id": "byName", - "options": "running" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#73BF69", - "mode": "fixed" - } - } - ] + "thresholds": { + "mode": "absolute", + "steps": [] } - ] + }, + "overrides": [] }, + "fontSize": "100%", "gridPos": { - "h": 7, - "w": 12, + "h": 5, + "w": 24, "x": 0, - "y": 4 + "y": 1 }, - "id": 227, + "height": null, + "hideTimeOverride": false, + "id": 8, "interval": null, "links": [], - "maxDataPoints": 1, + "mappings": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { - "legend": { - "calcs": [], - "displayMode": "table", - "placement": "right", - "values": [ - "value", - "percent" - ] - }, - "pieType": "pie", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "tooltip": { - "mode": "single" - } + "showHeader": true }, - "pluginVersion": "7.0.5", + "repeat": null, + "repeatDirection": null, + "span": 6, "targets": [ { - "expr": "sum (kafka_connect_connector_metrics{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\",status=\"running\"})", + "datasource": null, + "expr": "kafka_connect_app_info{namespace=\"$env\",app=\"$connect_cluster\",start_time_ms!=\"\"}", + "format": "table", + "hide": false, "instant": true, "interval": "", - "legendFormat": "running", - "refId": "B" + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum (kafka_connect_connector_metrics{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\",status=\"stopped\"})", + "datasource": null, + "expr": "kafka_connect_app_info{namespace=\"$env\",app=\"$connect_cluster\",version!=\"\"}", + "format": "table", + "hide": false, "instant": true, "interval": "", - "legendFormat": "stopped", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum (kafka_connect_connector_metrics{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\",status=\"paused\"})", + "datasource": null, + "expr": "sum by (pod) (kafka_connect_connect_worker_metrics_connector_count{namespace=\"$env\",app=\"$connect_cluster\"})", + "format": "table", + "hide": false, "instant": true, "interval": "", - "legendFormat": "paused", - "refId": "C" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Connector repartition per status", - "type": "piechart" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - } - }, - "decimals": 0, - "mappings": [], - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "destroyed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#B877D9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#F2495C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "paused" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FF9830", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "unassigned" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FADE2A", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 4 - }, - "id": 219, - "interval": null, - "links": [], - "maxDataPoints": 1, - "options": { - "legend": { - "calcs": [], - "displayMode": "table", - "placement": "right", - "values": [ - "value", - "percent" - ] - }, - "pieType": "pie", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" }, - "tooltip": { - "mode": "single" - } - }, - "pluginVersion": "7.0.5", - "targets": [ { - "expr": "sum(kafka_connect_connect_worker_metrics_connector_running_task_count{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\"})", + "datasource": null, + "expr": "sum by (pod) (kafka_connect_connect_worker_metrics_connector_startup_success_total{namespace=\"$env\",app=\"$connect_cluster\"})", + "format": "table", + "hide": false, "instant": true, "interval": "", - "legendFormat": "running", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(kafka_connect_connect_worker_metrics_connector_failed_task_count{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\"})", + "datasource": null, + "expr": "sum by (pod) (kafka_connect_connect_worker_metrics_connector_startup_failure_total{namespace=\"$env\",app=\"$connect_cluster\"})", + "format": "table", + "hide": false, "instant": true, "interval": "", - "legendFormat": "failed", - "refId": "B" + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(kafka_connect_connect_worker_metrics_connector_paused_task_count{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\"})", + "datasource": null, + "expr": "sum by (pod) (kafka_connect_connect_worker_metrics_task_count{namespace=\"$env\",app=\"$connect_cluster\"})", + "format": "table", + "hide": false, "instant": true, "interval": "", - "legendFormat": "paused", - "refId": "C" + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(kafka_connect_connect_worker_metrics_connector_unassigned_task_count{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\"})", + "datasource": null, + "expr": "sum by (pod) (kafka_connect_connect_worker_metrics_task_startup_success_total{namespace=\"$env\",app=\"$connect_cluster\"})", + "format": "table", + "hide": false, "instant": true, "interval": "", - "legendFormat": "unassigned", - "refId": "D" + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(kafka_connect_connect_worker_metrics_connector_destroyed_task_count{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\"})", + "datasource": null, + "expr": "sum by (pod) (kafka_connect_connect_worker_metrics_task_startup_failure_total{namespace=\"$env\",app=\"$connect_cluster\"})", + "format": "table", + "hide": false, "instant": true, "interval": "", - "legendFormat": "destroyed", - "refId": "E" + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Task repartition per status", - "type": "piechart" - }, - { - "datasource": "Prometheus", - "description": "Status of connectors over time", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, + "title": "Connect Workers", + "transformations": [ + { + "id": "seriesToColumns", + "options": { + "byField": "pod" + } + }, + { + "id": "filterFieldsByName", + "options": { + "include": { + "names": [ + "pod", + "app 1", + "start_time_ms", + "version", + "Value #C", + "Value #D", + "Value #E", + "Value #F", + "Value #G", + "Value #H", + "namespace 1" + ] + } + } + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "app 1": 1, + "namespace 1": 0, + "pod": 2, + "start_time_ms": 3, + "version": 4 + }, + "renameByName": { + "Value #C": "connectors", + "Value #D": "conn. success", + "Value #E": "conn. failure", + "Value #F": "tasks", + "Value #G": "tasks success", + "Value #H": "tasks failure", + "app 1": "cluster", + "namespace 1": "environment", + "pod": "worker", + "start_time_ms": "start time", + "version": "version" + } + } + } + ], + "transparent": false, + "type": "table" + }, + { + "cacheTimeout": null, + "color": { + "mode": "thresholds" + }, + "columns": [], + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": { + "align": "auto", + "displayMode": "auto", + "filterable": false + }, + "thresholds": { + "mode": "absolute", + "steps": [] + } + }, + "overrides": [] + }, + "fontSize": "100%", + "gridPos": { + "h": 5, + "w": 24, + "x": 0, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 9, + "interval": null, + "links": [], + "mappings": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "showHeader": true + }, + "repeat": null, + "repeatDirection": null, + "span": 6, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_connector_info{namespace=\"$env\",app=\"$connect_cluster\"}", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "sum by (connector) (kafka_connect_connect_worker_metrics_connector_total_task_count{namespace=\"$env\",app=\"$connect_cluster\"})", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "sum by (connector) (kafka_connect_connect_worker_metrics_connector_running_task_count{namespace=\"$env\",app=\"$connect_cluster\"})", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "sum by (connector) (kafka_connect_connect_worker_metrics_connector_failed_task_count{namespace=\"$env\",app=\"$connect_cluster\"})", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "sum by (connector) (kafka_connect_connect_worker_metrics_connector_paused_task_count{namespace=\"$env\",app=\"$connect_cluster\"})", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connectors", + "transformations": [ + { + "id": "seriesToColumns", + "options": { + "byField": "connector" + } + }, + { + "id": "filterFieldsByName", + "options": { + "include": { + "names": [ + "connector", + "Value #B", + "Value #C", + "Value #D", + "Value #E" + ] + } + } + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "renameByName": { + "Value #B": "tasks", + "Value #C": "running", + "Value #D": "failed", + "Value #E": "paused" + } + } + } + ], + "transparent": false, + "type": "table" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -689,144 +984,88 @@ "viz": false }, "lineInterpolation": "linear", - "lineWidth": 3, + "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "decimals": 0, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "none" + "unit": "percentunit" }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "paused" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "orange", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/stopped.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#F2495C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/paused.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FF9830", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/running.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#73BF69", - "mode": "fixed" - } - } - ] - } - ] + "overrides": [] }, "gridPos": { - "h": 6, + "h": 10, "w": 12, "x": 0, - "y": 11 + "y": 3 }, - "id": 228, + "height": null, + "hideTimeOverride": false, + "id": 10, + "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull" + "max", + "mean", + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum (kafka_connect_connector_metrics{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\",status!=\"\"}) by (status) ", + "datasource": null, + "expr": "kafka_connect_connector_task_metrics_running_ratio{namespace=\"$env\",app=\"$connect_cluster\"}", "format": "time_series", "hide": false, "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{status}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{connector}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Status of connectors", + "title": "Tasks Running Ratio", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "Status of tasks over time", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -845,223 +1084,140 @@ "viz": false }, "lineInterpolation": "linear", - "lineWidth": 3, + "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "decimals": 0, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "none" + "unit": "ms" }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "destroyed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "purple", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "red", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "paused" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "orange", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "running" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "green", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "unassigned" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "yellow", - "mode": "fixed" - } - } - ] - } - ] + "overrides": [] }, "gridPos": { - "h": 6, + "h": 10, "w": 12, "x": 12, - "y": 11 + "y": 3 }, - "id": 226, + "height": null, + "hideTimeOverride": false, + "id": 11, + "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull" + "max", + "mean", + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_connect_connect_worker_metrics_connector_running_task_count{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "running", - "refId": "A" - }, - { - "expr": "sum(kafka_connect_connect_worker_metrics_connector_failed_task_count{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "failed", - "refId": "B" - }, - { - "expr": "sum(kafka_connect_connect_worker_metrics_connector_paused_task_count{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "paused", - "refId": "C" - }, - { - "expr": "sum(kafka_connect_connect_worker_metrics_connector_destroyed_task_count{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "destroyed", - "refId": "D" - }, - { - "expr": "sum(kafka_connect_connect_worker_metrics_connector_unassigned_task_count{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\"})", + "datasource": null, + "expr": "kafka_connect_connect_worker_rebalance_metrics_rebalance_avg_time_ms{namespace=\"$env\",app=\"$connect_cluster\"}", "format": "time_series", "hide": false, "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "unassigned", - "refId": "E" + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Status of tasks", + "title": "Rebalance Latency", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "collapsed": false, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 17 + "y": 4 }, - "id": 221, + "height": null, + "hideTimeOverride": false, + "id": 12, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "System", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "datasource": "Prometheus", + "description": "Rate of CPU seconds used by the Java process.\n 100% usage represents one core. \n If there are multiple cores, the total capacity should be 100% * number_cores.", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { - "axisLabel": "Cores", + "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -1072,50 +1228,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "percent" + "unit": "percentunit" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 10, "w": 8, "x": 0, - "y": 18 + "y": 4 }, - "id": 223, + "height": null, + "hideTimeOverride": false, + "id": 13, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -1124,34 +1275,49 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "rate(process_cpu_seconds_total{namespace=\"$ns\",pod=~\"$pod\"}[5m])", + "datasource": null, + "expr": "irate(process_cpu_seconds_total{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\"}[5m])", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{kafka_connect_cluster_id}}-{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "CPU Usage", + "title": "CPU usage", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", + "description": "Sum of JVM memory used, without including areas (e.g. heap size).", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { - "axisLabel": "Memory", + "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -1159,53 +1325,48 @@ "viz": false }, "lineInterpolation": "linear", - "lineWidth": 2, + "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "bytes" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 10, "w": 8, "x": 8, - "y": 18 + "y": 4 }, - "id": 224, + "height": null, + "hideTimeOverride": false, + "id": 14, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -1214,2030 +1375,168 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum without(area)(jvm_memory_bytes_used{namespace=\"$ns\",pod=~\"$pod\"})", - "interval": "", - "legendFormat": "{{kafka_connect_cluster_id}}-{{instance}}", - "refId": "A" - }, - { - "expr": "jvm_memory_bytes_max{namespace=\"$ns\",pod=~\"$pod\",area=\"heap\"}", + "datasource": null, + "expr": "sum without(area)(jvm_memory_bytes_used{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{instance}}", - "refId": "B" + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "JVM Memory Used", + "title": "Memory usage", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", + "description": "Sum of seconds used by Garbage Collection.", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { - "axisLabel": "% time in GC", + "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "decimals": 2, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 18 - }, - "id": 225, - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "expr": "sum without(gc)(rate(jvm_gc_collection_seconds_sum{namespace=\"$ns\",pod=~\"$pod\"}[5m]))", - "interval": "", - "legendFormat": "{{kafka_connect_cluster_id}}-{{instance}}", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "JVM GC time", - "type": "timeseries" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 26 - }, - "id": 97, - "panels": [ - { - "columns": [], - "datasource": "Prometheus", - "fontSize": "90%", - "gridPos": { - "h": 8, - "w": 24, - "x": 0, - "y": 27 - }, - "id": 146, - "pageSize": 100, - "showHeader": true, - "sort": { - "col": 7, - "desc": true - }, - "styles": [ - { - "alias": "", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "mappingType": 1, - "pattern": "Time", - "thresholds": [], - "type": "hidden", - "unit": "short" - }, - { - "alias": "", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "mappingType": 1, - "pattern": "__name__", - "thresholds": [], - "type": "hidden", - "unit": "short" - }, - { - "alias": "", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "mappingType": 1, - "pattern": "env", - "thresholds": [], - "type": "hidden", - "unit": "short" - }, - { - "alias": "", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "mappingType": 1, - "pattern": "instance", - "thresholds": [], - "type": "string", - "unit": "short" - }, - { - "alias": "", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "mappingType": 1, - "pattern": "job", - "thresholds": [], - "type": "hidden", - "unit": "short" - }, - { - "alias": "", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "mappingType": 1, - "pattern": "client_id", - "thresholds": [], - "type": "hidden", - "unit": "short" - }, - { - "alias": "Startup time", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "MMMM D, YYYY LT", - "decimals": 2, - "mappingType": 1, - "pattern": "start_time_ms", - "thresholds": [], - "type": "date", - "unit": "short" - }, - { - "alias": "", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "mappingType": 1, - "pattern": "Value #B", - "thresholds": [], - "type": "hidden", - "unit": "short" - }, - { - "alias": "", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "mappingType": 1, - "pattern": "Value #A", - "thresholds": [], - "type": "hidden", - "unit": "short" - }, - { - "alias": "Connector Count", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "mappingType": 1, - "pattern": "Value #C", - "thresholds": [], - "type": "number", - "unit": "short" - }, - { - "alias": "Connector Startup Success Total", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "mappingType": 1, - "pattern": "Value #D", - "thresholds": [], - "type": "number", - "unit": "short" - }, - { - "alias": "Connector Startup Failure Total", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "mappingType": 1, - "pattern": "Value #E", - "thresholds": [], - "type": "number", - "unit": "short" - }, - { - "alias": "Number of rebalances", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "mappingType": 1, - "pattern": "Value #F", - "thresholds": [], - "type": "number", - "unit": "short" - }, - { - "alias": "Average time of Rebalances", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "mappingType": 1, - "pattern": "Value #G", - "thresholds": [], - "type": "number", - "unit": "ms", - "valueMaps": [ - { - "text": "0", - "value": "NaN" - }, - { - "text": "N/A", - "value": "null" - } - ] - }, - { - "alias": "Time since last rebalance", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "mappingType": 1, - "pattern": "Value #H", - "thresholds": [], - "type": "number", - "unit": "ms" - }, - { - "alias": "Worker instance", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "mappingType": 1, - "pattern": "instance", - "thresholds": [], - "type": "number", - "unit": "short" - }, - { - "alias": "Number of tasks", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "mappingType": 1, - "pattern": "Value #I", - "thresholds": [], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "mappingType": 1, - "pattern": "", - "thresholds": [], - "type": "number", - "unit": "short" - }, - { - "alias": "Task Startup Success ", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "mappingType": 1, - "pattern": "Value #J", - "thresholds": [], - "type": "number", - "unit": "short" - }, - { - "alias": "Task Startup Failure", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "mappingType": 1, - "pattern": "Value #K", - "thresholds": [], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "align": "right", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "decimals": 2, - "pattern": "/.*/", - "thresholds": [], - "type": "number", - "unit": "short" - } - ], - "targets": [ - { - "expr": "kafka_connect_app_info{namespace=\"$ns\",pod=~\"$pod\",client_id!=\"\",start_time_ms!=\"\"}", - "format": "table", - "hide": false, - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "B" - }, - { - "expr": "kafka_connect_app_info{namespace=\"$ns\",pod=~\"$pod\",client_id!=\"\",version!=\"\"}", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "A" - }, - { - "expr": "sum by (instance) (kafka_connect_connect_worker_metrics_connector_count{namespace=\"$ns\",pod=~\"$pod\"})", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "C" - }, - { - "expr": "sum by (instance) (kafka_connect_connect_worker_metrics_connector_startup_success_total{namespace=\"$ns\",pod=~\"$pod\"})", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "D" - }, - { - "expr": "sum by (instance) (kafka_connect_connect_worker_metrics_connector_startup_failure_total{namespace=\"$ns\",pod=~\"$pod\"})", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "E" - }, - { - "expr": "sum by (instance) (kafka_connect_connect_worker_metrics_task_count{namespace=\"$ns\",pod=~\"$pod\"})", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "I" - }, - { - "expr": "sum by (instance) (kafka_connect_connect_worker_metrics_task_startup_success_total{namespace=\"$ns\",pod=~\"$pod\"})", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "J" - }, - { - "expr": "sum by (instance) (kafka_connect_connect_worker_metrics_task_startup_failure_total{namespace=\"$ns\",pod=~\"$pod\"})", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "K" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Connect Worker", - "transform": "table", - "transparent": true, - "type": "table-old" - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "Prometheus", - "description": "Average number of network operations (reads or writes) on all connections per second", - "fieldConfig": { - "defaults": { - "links": [] - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 8, - "x": 0, - "y": 35 - }, - "hiddenSeries": false, - "id": 95, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.1.3", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "kafka_connect_connect_metrics_network_io_rate{namespace=\"$ns\",pod=~\"$pod\",client_id!=\"\"}", - "format": "time_series", - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{instance}}-{{client_id}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Network IO Rate", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "ops", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "Prometheus", - "description": "Bytes per second read off all sockets", - "fieldConfig": { - "defaults": { - "links": [] - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 8, - "x": 8, - "y": 35 - }, - "hiddenSeries": false, - "id": 91, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.1.3", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "kafka_connect_connect_metrics_incoming_byte_rate{namespace=\"$ns\",pod=~\"$pod\",client_id!=\"\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{instance}}-{{client_id}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Incoming Byte Rate", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "Prometheus", - "description": "Average number of outgoing bytes sent per second to all servers", - "fieldConfig": { - "defaults": { - "links": [] - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 8, - "x": 16, - "y": 35 - }, - "hiddenSeries": false, - "id": 171, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.1.3", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "kafka_connect_connect_metrics_outgoing_byte_rate{namespace=\"$ns\",pod=~\"$pod\",client_id!=\"\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{instance}}-{{client_id}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Outgoing Byte Rate", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "Prometheus", - "description": "Current number of active connections", - "fieldConfig": { - "defaults": { - "links": [] - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 8, - "x": 0, - "y": 42 - }, - "hiddenSeries": false, - "id": 169, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.1.3", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "kafka_connect_connect_metrics_connection_count{namespace=\"$ns\",pod=~\"$pod\",client_id!=\"\"}", - "format": "time_series", - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{instance}}-{{client_id}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Current number of active connections", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 0, - "format": "connections", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "Prometheus", - "description": "Connections that failed authentication", - "fieldConfig": { - "defaults": { - "links": [] - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 8, - "x": 8, - "y": 42 - }, - "hiddenSeries": false, - "id": 170, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.1.3", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "kafka_connect_connect_metrics_failed_authentication_total{namespace=\"$ns\",pod=~\"$pod\",client_id!=\"\"}", - "format": "time_series", - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{instance}}-{{client_id}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Failed authentication connections", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 0, - "format": "connections", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "Prometheus", - "description": "Connections that were successfully authenticated using SASL or SSL", - "fieldConfig": { - "defaults": { - "links": [] - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 8, - "x": 16, - "y": 42 - }, - "hiddenSeries": false, - "id": 174, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.1.3", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "kafka_connect_connect_metrics_successful_authentication_rate{namespace=\"$ns\",pod=~\"$pod\",client_id!=\"\"}", - "format": "time_series", - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{instance}}-{{client_id}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Success authentication connections", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 0, - "format": "connections", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "Prometheus", - "description": "Average number of requests sent per second", - "fieldConfig": { - "defaults": { - "links": [] - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 8, - "x": 0, - "y": 49 - }, - "hiddenSeries": false, - "id": 172, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.1.3", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "kafka_connect_connect_metrics_request_rate{namespace=\"$ns\",pod=~\"$pod\",client_id!=\"\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{instance}}-{{client_id}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Average number of requests", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "reqps", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "Prometheus", - "description": "Responses received and sent per second", - "fieldConfig": { - "defaults": { - "links": [] - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 8, - "x": 8, - "y": 49 - }, - "hiddenSeries": false, - "id": 173, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.1.3", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "kafka_connect_connect_metrics_response_rate{namespace=\"$ns\",pod=~\"$pod\",client_id!=\"\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{instance}}-{{client_id}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Responses received and sent", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "reqps", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "Prometheus", - "description": "Fraction of time the I/O thread spent doing I/O", - "fieldConfig": { - "defaults": { - "links": [] - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 8, - "x": 16, - "y": 49 - }, - "hiddenSeries": false, - "id": 93, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.1.3", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "kafka_connect_connect_metrics_io_ratio{namespace=\"$ns\",pod=~\"$pod\",client_id!=\"\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{instance}}-{{client_id}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "IO Ratio", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "percentunit", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - } - ], - "title": "Connect Worker", - "type": "row" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 27 - }, - "id": 132, - "panels": [ - { - "columns": [], - "datasource": "Prometheus", - "fontSize": "110%", - "gridPos": { - "h": 11, - "w": 24, - "x": 0, - "y": 28 - }, - "id": 129, - "pageSize": 100, - "showHeader": true, - "sort": { - "col": 8, - "desc": true - }, - "styles": [ - { - "alias": "", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "mappingType": 1, - "pattern": "Time", - "thresholds": [], - "type": "hidden", - "unit": "short" - }, - { - "alias": "", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "mappingType": 1, - "pattern": "__name__", - "thresholds": [], - "type": "hidden", - "unit": "short" - }, - { - "alias": "class", - "align": "left", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "mappingType": 1, - "pattern": "connector_class", - "preserveFormat": false, - "thresholds": [], - "type": "string", - "unit": "short", - "valueMaps": [] - }, - { - "alias": "", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "mappingType": 1, - "pattern": "env", - "thresholds": [], - "type": "hidden", - "unit": "short" - }, - { - "alias": "", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "mappingType": 1, - "pattern": "instance", - "thresholds": [], - "type": "hidden", - "unit": "short" - }, - { - "alias": "", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "mappingType": 1, - "pattern": "job", - "thresholds": [], - "type": "hidden", - "unit": "short" - }, - { - "alias": "Nb of Tasks destroyed", - "align": "auto", - "colorMode": "cell", - "colors": [ - "rgba(50, 172, 45, 0.97)", - "#B877D9", - "#B877D9" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "link": false, - "mappingType": 1, - "pattern": "Value #B", - "thresholds": [ - "1" - ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "mappingType": 1, - "pattern": "Value #A", - "thresholds": [], - "type": "hidden", - "unit": "short" - }, - { - "alias": "", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "mappingType": 1, - "pattern": "Value #C", - "thresholds": [], - "type": "hidden", - "unit": "short" - }, - { - "alias": "", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "mappingType": 1, - "pattern": "Value #D", - "thresholds": [], - "type": "hidden", - "unit": "short" - }, - { - "alias": "", - "align": "auto", - "colorMode": "cell", - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "#F2495C" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "mappingType": 1, - "pattern": "status", - "thresholds": [ - "2" - ], - "type": "string", - "unit": "short", - "valueMaps": [ - { - "text": "running", - "value": "1" - }, - { - "text": "paused", - "value": "2" - }, - { - "text": "stopped", - "value": "3" - } - ] - }, - { - "alias": "name", - "align": "left", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "mappingType": 1, - "pattern": "connector", - "thresholds": [], - "type": "number", - "unit": "short" - }, - { - "alias": "type", - "align": "left", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "mappingType": 1, - "pattern": "connector_type", - "thresholds": [], - "type": "string", - "unit": "short" - }, - { - "alias": "version", - "align": "left", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "mappingType": 1, - "pattern": "connector_version", - "thresholds": [], - "type": "number", - "unit": "short" - }, - { - "alias": "Nb of tasks", - "align": "auto", - "colorMode": "cell", - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "mappingType": 1, - "pattern": "Value #E", - "thresholds": [ - "0", - "1" - ], - "type": "number", - "unit": "short" - }, - { - "alias": "Nb of Tasks running", - "align": "auto", - "colorMode": "cell", - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "mappingType": 1, - "pattern": "Value #F", - "thresholds": [ - "0", - "1" - ], - "type": "number", - "unit": "short" - }, - { - "alias": "Nb of Tasks failed", - "align": "auto", - "colorMode": "cell", - "colors": [ - "rgba(50, 172, 45, 0.97)", - "#F2495C", - "#F2495C" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "link": false, - "mappingType": 1, - "pattern": "Value #G", - "preserveFormat": false, - "thresholds": [ - "1" - ], - "type": "number", - "unit": "short", - "valueMaps": [ - { - "text": "0", - "value": "null" - } - ] - }, - { - "alias": "Nb of Tasks paused", - "align": "auto", - "colorMode": "cell", - "colors": [ - "rgba(50, 172, 45, 0.97)", - "#FF9830", - "#FF9830" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "mappingType": 1, - "pattern": "Value #H", - "thresholds": [ - "1" - ], - "type": "number", - "unit": "short", - "valueMaps": [ - { - "text": "0", - "value": "null" - } - ] - }, - { - "alias": "", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "mappingType": 1, - "pattern": "Value #I", - "thresholds": [], - "type": "hidden", - "unit": "short" - }, - { - "alias": "Nb of Tasks unassigned", - "align": "auto", - "colorMode": "cell", - "colors": [ - "rgba(50, 172, 45, 0.97)", - "#FADE2A", - "#FADE2A" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "mappingType": 1, - "pattern": "Value #J", - "thresholds": [ - "1" - ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "align": "right", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "decimals": 2, - "pattern": "/.*/", - "thresholds": [], - "type": "number", - "unit": "short" - } - ], - "targets": [ - { - "expr": "label_replace(label_replace(label_replace(kafka_connect_connector_info{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\",status!=\"\"}, \"status\", \"1\", \"status\", \"running\"), \"status\", \"2\", \"status\", \"paused\"), \"status\", \"3\", \"status\", \"stopped\")", - "format": "table", - "hide": false, - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "I" - }, - { - "expr": "kafka_connect_connector_info{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\",connector_type!=\"\"}", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "A" - }, - { - "expr": "kafka_connect_connector_info{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\",connector_version!=\"\"}", - "format": "table", - "hide": false, - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "C" - }, - { - "expr": "kafka_connect_connector_info{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\",connector_class!=\"\"}", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "D" - }, - { - "expr": "sum by (connector) (kafka_connect_connect_worker_metrics_connector_total_task_count{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\"})", - "format": "table", - "hide": false, - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "E" - }, - { - "expr": "sum by (connector) (kafka_connect_connect_worker_metrics_connector_running_task_count{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\"})", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "F" - }, - { - "expr": "sum by (connector) (kafka_connect_connect_worker_metrics_connector_failed_task_count{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\"})", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "G" - }, - { - "expr": "sum by (connector) (kafka_connect_connect_worker_metrics_connector_paused_task_count{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\"})", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "H" + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - { - "expr": "sum by (connector) (kafka_connect_connect_worker_metrics_connector_destroyed_task_count{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\"})", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "B" + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" }, - { - "expr": "sum by (connector) (kafka_connect_connect_worker_metrics_connector_unassigned_task_count{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\"})", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "J" + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 15, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" ], - "timeFrom": null, - "timeShift": null, - "title": "Connectors", - "transform": "table", - "transformations": [], - "type": "table-old" + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum without(gc)(irate(jvm_gc_collection_seconds_sum{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\"}[5m]))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Connector details", - "type": "row" + "timeFrom": null, + "timeShift": null, + "title": "GC collection", + "transformations": [], + "transparent": false, + "type": "timeseries" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 28 + "y": 6 }, - "id": 234, + "height": null, + "hideTimeOverride": false, + "id": 16, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "Rebalances average time", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -3248,8 +1547,8 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 30, - "gradientMode": "opacity", + "fillOpacity": 0, + "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, @@ -3259,52 +1558,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", + "showPoints": "auto", "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "ms" + "unit": "bytes" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 24, + "h": 10, + "w": 8, "x": 0, - "y": 29 + "y": 6 }, - "id": 209, + "height": null, + "hideTimeOverride": false, + "id": 17, + "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -3313,119 +1605,138 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_connect_connect_worker_rebalance_metrics_rebalance_avg_time_ms{namespace=\"$ns\",pod=~\"$pod\"}", + "datasource": null, + "expr": "kafka_connect_connector_task_metrics_batch_size_avg{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", "hide": false, "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{kafka_connect_cluster_id}}-{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Rebalances average time", + "title": "Batch Size (Avg.)", + "transformations": [], + "transparent": false, "type": "timeseries" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Time since last rebalance", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "decimals": 0, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" } - ], + }, + "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "#299c46", - "value": null - } - ] + "steps": [] }, - "unit": "clockms" + "unit": "bytes" }, "overrides": [] }, "gridPos": { - "h": 4, - "w": 24, - "x": 0, - "y": 37 + "h": 10, + "w": 8, + "x": 8, + "y": 6 }, - "id": 230, + "height": null, + "hideTimeOverride": false, + "id": 18, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { - "colorMode": "value", - "fieldOptions": { - "calcs": [ - "lastNotNull" - ] - }, - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "reduceOptions": { + "legend": { "calcs": [ - "lastNotNull" + "max", + "mean", + "last" ], - "fields": "", - "values": false + "displayMode": "table", + "placement": "bottom" }, - "text": {}, - "textMode": "auto" + "tooltip": { + "mode": "single" + } }, - "pluginVersion": "8.1.3", - "repeat": "instance", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_connect_connect_worker_rebalance_metrics_time_since_last_rebalance_ms{namespace=\"$ns\",pod=~\"$pod\",job=\"connect\"} >= 0", + "datasource": null, + "expr": "kafka_connect_connector_task_metrics_batch_size_max{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", + "hide": false, "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "($instance) Time since last rebalance ", - "type": "stat" - } - ], - "title": "Rebalances", - "type": "row" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 29 - }, - "id": 112, - "panels": [ + "timeFrom": null, + "timeShift": null, + "title": "Batch Size (Max.)", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "Average size of the batches processed by the connector", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -3436,7 +1747,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -3447,78 +1758,85 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "decbytes" + "unit": "percentunit" }, "overrides": [] }, "gridPos": { - "h": 7, - "w": 12, + "h": 10, + "w": 8, "x": 0, - "y": 30 + "y": 7 }, - "id": 113, + "height": null, + "hideTimeOverride": false, + "id": 19, + "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_connect_connector_task_metrics_batch_size_avg{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\"}", + "datasource": null, + "expr": "kafka_connect_connector_task_metrics_offset_commit_success_percentage{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{connector}}-{{task}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Batch Size Average", + "title": "Offset commit success %", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "Maximum size of the batches processed by the connector", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -3529,7 +1847,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -3540,78 +1858,127 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "decbytes" + "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 30 + "h": 10, + "w": 8, + "x": 8, + "y": 7 }, - "id": 114, + "height": null, + "hideTimeOverride": false, + "id": 20, + "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_connect_connector_task_metrics_batch_size_max{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\"}", + "datasource": null, + "expr": "kafka_connect_connector_task_metrics_offset_commit_avg_time_ms{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{connector}}-{{task}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Batch Size Max", + "title": "Offset commit avg. latency", + "transformations": [], + "transparent": false, "type": "timeseries" - }, + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Tasks", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 21, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "Average percentage of the task’s offset commit attempts that succeeded", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -3622,7 +1989,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -3633,80 +2000,85 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "max": 1, - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "percentunit" + "unit": "" }, "overrides": [] }, "gridPos": { - "h": 7, - "w": 12, + "h": 10, + "w": 8, "x": 0, - "y": 37 + "y": 8 }, - "id": 115, + "height": null, + "hideTimeOverride": false, + "id": 22, + "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_connect_connector_task_metrics_offset_commit_success_percentage{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\"}", + "datasource": null, + "expr": "kafka_connect_task_error_metrics_total_record_failures{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{connector}}-{{task}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Offset commit success percentage", + "title": "Total Record Failures", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The average time in milliseconds taken by this task to commit offsets", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -3717,7 +2089,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -3728,78 +2100,85 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "ms" + "unit": "" }, "overrides": [] }, "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 37 + "h": 10, + "w": 8, + "x": 8, + "y": 8 }, - "id": 116, + "height": null, + "hideTimeOverride": false, + "id": 23, + "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_connect_connector_task_metrics_offset_commit_avg_time_ms{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\"}", + "datasource": null, + "expr": "kafka_connect_task_error_metrics_total_record_errors{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{connector}}-{{task}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Offset commit Average Time", + "title": "Total Record Error", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The fraction of time this task has spent in the running state.", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -3810,7 +2189,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -3821,96 +2200,85 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "max": 1, - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "percentunit" + "unit": "" }, "overrides": [] }, "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 44 + "h": 10, + "w": 8, + "x": 16, + "y": 8 }, - "id": 117, + "height": null, + "hideTimeOverride": false, + "id": 24, + "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_connect_connector_task_metrics_running_ratio{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\"}", + "datasource": null, + "expr": "kafka_connect_task_error_metrics_total_records_skipped{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", "hide": false, + "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{connector}}-{{task}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Running ratio", + "title": "Total Records Skipped", + "transformations": [], + "transparent": false, "type": "timeseries" - } - ], - "title": "Task metrics", - "type": "row" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 30 - }, - "id": 201, - "panels": [ + }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "Total number of failures seen by task", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -3921,7 +2289,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -3932,80 +2300,85 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "decimals": 0, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "none" + "unit": "" }, "overrides": [] }, "gridPos": { - "h": 7, + "h": 10, "w": 8, "x": 0, - "y": 31 + "y": 9 }, - "id": 203, + "height": null, + "hideTimeOverride": false, + "id": 25, + "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_connect_task_error_metrics_total_record_failures{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\"}", + "datasource": null, + "expr": "kafka_connect_task_error_metrics_total_errors_logged{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{connector}}-{{task}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Total record failures", + "title": "Total Errors Logged", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "Total number of errors seen by task", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -4016,7 +2389,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -4027,80 +2400,85 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "decimals": 0, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "none" + "unit": "" }, "overrides": [] }, "gridPos": { - "h": 7, + "h": 10, "w": 8, "x": 8, - "y": 31 + "y": 9 }, - "id": 205, + "height": null, + "hideTimeOverride": false, + "id": 26, + "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_connect_task_error_metrics_total_record_errors{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\"}", + "datasource": null, + "expr": "kafka_connect_task_error_metrics_total_retries{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{connector}}-{{task}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Total record errors", + "title": "Total Retries", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "Total number of records skipped by task", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -4111,7 +2489,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -4122,80 +2500,127 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "decimals": 0, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "none" + "unit": "" }, "overrides": [] }, "gridPos": { - "h": 7, + "h": 10, "w": 8, "x": 16, - "y": 31 + "y": 9 }, - "id": 206, + "height": null, + "hideTimeOverride": false, + "id": 27, + "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_connect_task_error_metrics_total_records_skipped{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\"}", + "datasource": null, + "expr": "kafka_connect_task_error_metrics_deadletterqueue_produce_requests{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{connector}}-{{task}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Total record skipped", + "title": "Dead Letter Topic Requests", + "transformations": [], + "transparent": false, "type": "timeseries" - }, + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Task Errors", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 10 + }, + "height": null, + "hideTimeOverride": false, + "id": 28, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The number of messages that was logged into either the dead letter queue or with Log4j", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -4206,7 +2631,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -4217,80 +2642,85 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "decimals": 0, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "none" + "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 7, + "h": 10, "w": 8, "x": 0, - "y": 38 + "y": 10 }, - "id": 208, + "height": null, + "hideTimeOverride": false, + "id": 29, + "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_connect_task_error_metrics_total_errors_logged{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\"}", + "datasource": null, + "expr": "kafka_connect_source_task_metrics_poll_batch_avg_time_ms{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{connector}}-{{task}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Total errors logged", + "title": "Poll Batch Avg. Latency", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "Total number of retries made by task", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -4301,7 +2731,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -4312,80 +2742,85 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "decimals": 0, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "none" + "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 7, + "h": 10, "w": 8, "x": 8, - "y": 38 + "y": 10 }, - "id": 207, + "height": null, + "hideTimeOverride": false, + "id": 30, + "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_connect_task_error_metrics_total_retries{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\"}", + "datasource": null, + "expr": "kafka_connect_source_task_metrics_poll_batch_max_time_ms{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{connector}}-{{task}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Total retries", + "title": "Poll Batch Max. Latency", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "Number of produce requests to the dead letter queue", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -4396,7 +2831,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -4407,80 +2842,85 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "decimals": 0, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "none" + "unit": "ops" }, "overrides": [] }, "gridPos": { - "h": 7, + "h": 10, "w": 8, "x": 0, - "y": 45 + "y": 11 }, - "id": 202, + "height": null, + "hideTimeOverride": false, + "id": 31, + "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_connect_task_error_metrics_deadletterqueue_produce_requests{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\"}", + "datasource": null, + "expr": "kafka_connect_source_task_metrics_source_record_poll_rate{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{connector}}-{{task}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Dead letter queue Produce requests", + "title": "Source Record Poll Rate", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "Number of produce requests to the dead letter queue", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -4491,7 +2931,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -4502,95 +2942,127 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "decimals": 0, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "none" + "unit": "ops" }, "overrides": [] }, "gridPos": { - "h": 7, + "h": 10, "w": 8, "x": 8, - "y": 45 + "y": 11 }, - "id": 204, + "height": null, + "hideTimeOverride": false, + "id": 32, + "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_connect_task_error_metrics_deadletterqueue_produce_requests{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\"}", + "datasource": null, + "expr": "kafka_connect_source_task_metrics_source_record_write_rate{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{connector}}-{{task}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Dead letter queue Produce requests", + "title": "Source Record Write Rate", + "transformations": [], + "transparent": false, "type": "timeseries" } ], - "title": "Task Errors metrics", + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Source Tasks", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 31 + "y": 12 }, - "id": 139, + "height": null, + "hideTimeOverride": false, + "id": 33, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The average time in milliseconds taken by this task to poll for a batch of source records", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -4601,7 +3073,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -4612,79 +3084,85 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 7, + "h": 10, "w": 8, "x": 0, - "y": 32 + "y": 12 }, - "id": 140, + "height": null, + "hideTimeOverride": false, + "id": 34, + "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_connect_source_task_metrics_poll_batch_avg_time_ms{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\",task!=\"\"}", + "datasource": null, + "expr": "kafka_connect_sink_task_metrics_put_batch_avg_time_ms{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", + "hide": false, "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{connector}}-{{task}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Poll Batch Average time", + "title": "Put Batch Avg. Latency", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The maximum time in milliseconds taken by this task to poll for a batch of source records", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -4695,7 +3173,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -4706,80 +3184,85 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 7, + "h": 10, "w": 8, "x": 8, - "y": 32 + "y": 12 }, - "id": 141, + "height": null, + "hideTimeOverride": false, + "id": 35, + "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_connect_source_task_metrics_poll_batch_max_time_ms{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\",task!=\"\"}", + "datasource": null, + "expr": "kafka_connect_sink_task_metrics_put_batch_max_time_ms{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", "hide": false, "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{connector}}-{{task}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Poll Batch Max time", + "title": "Put Batch Max. Latency", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The average per-second number of records produced/polled (before transformation) by this task belonging to the named source connector in this worker.", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -4790,7 +3273,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -4801,79 +3284,127 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "ops" + "unit": "" }, "overrides": [] }, "gridPos": { - "h": 7, + "h": 10, "w": 8, "x": 0, - "y": 39 + "y": 13 }, - "id": 144, + "height": null, + "hideTimeOverride": false, + "id": 36, + "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_connect_source_task_metrics_source_record_poll_rate{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\",task!=\"\"}", + "datasource": null, + "expr": "kafka_connect_sink_task_metrics_partition_count{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", + "hide": false, "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{connector}}-{{task}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Source Record Poll rate", + "title": "Partition Count", + "transformations": [], + "transparent": false, "type": "timeseries" - }, + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Sink Tasks", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 5 + }, + "height": null, + "hideTimeOverride": false, + "id": 37, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The average per-second number of records output from the transformations and written to Kafka for this task belonging to the named source connector in this worker. This is after transformations are applied and excludes any records filtered out by the transformations.", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -4884,7 +3415,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -4895,79 +3426,85 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "short" + "unit": "binBps" }, "overrides": [] }, "gridPos": { - "h": 7, + "h": 10, "w": 8, - "x": 8, - "y": 39 + "x": 0, + "y": 5 }, - "id": 143, + "height": null, + "hideTimeOverride": false, + "id": 38, + "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_connect_source_task_metrics_source_record_write_rate{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\",task!=\"\"}", + "datasource": null, + "expr": "kafka_connect_connect_metrics_incoming_byte_rate{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\"}", "format": "time_series", + "hide": false, "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{connector}}-{{task}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Source Record Write rate", + "title": "Incoming Byte Rate", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The average number of records that have been produced by this task but not yet completely written to Kafka.", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -4978,7 +3515,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -4989,79 +3526,85 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "short" + "unit": "binBps" }, "overrides": [] }, "gridPos": { - "h": 7, + "h": 10, "w": 8, - "x": 0, - "y": 46 + "x": 8, + "y": 5 }, - "id": 142, + "height": null, + "hideTimeOverride": false, + "id": 39, + "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_connect_source_task_metrics_source_record_active_count_avg{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\",task!=\"\"}", + "datasource": null, + "expr": "kafka_connect_connect_metrics_outgoing_byte_rate{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\"}", "format": "time_series", + "hide": false, "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{connector}}-{{task}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Source Record Active Count average", + "title": "Outgoing Byte Rate", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The maximum number of records that have been produced by this task but not yet completely written to Kafka.", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -5072,7 +3615,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -5083,94 +3626,85 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "short" + "unit": "percentunit" }, "overrides": [] }, "gridPos": { - "h": 7, + "h": 10, "w": 8, - "x": 8, - "y": 46 + "x": 0, + "y": 6 }, - "id": 145, + "height": null, + "hideTimeOverride": false, + "id": 40, + "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_connect_source_task_metrics_source_record_active_count_max{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\",task!=\"\"}", + "datasource": null, + "expr": "kafka_connect_connect_metrics_io_ratio{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\"}", "format": "time_series", + "hide": false, "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{connector}}-{{task}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Source Record Active Count max", + "title": "IO Ratio", + "transformations": [], + "transparent": false, "type": "timeseries" - } - ], - "title": "Source metrics", - "type": "row" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 32 - }, - "id": 134, - "panels": [ + }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The number of topic partitions assigned to this task belonging to the named sink connector in this worker.", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -5181,7 +3715,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -5192,76 +3726,85 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "decimals": 0, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "short" + "unit": "binBps" }, "overrides": [] }, "gridPos": { - "h": 7, + "h": 10, "w": 8, - "x": 0, - "y": 33 + "x": 8, + "y": 6 }, - "id": 135, + "height": null, + "hideTimeOverride": false, + "id": 41, + "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { - "calcs": [], - "displayMode": "list", + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_connect_sink_task_metrics_partition_count{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\",task!=\"\"}", + "datasource": null, + "expr": "kafka_connect_connect_metrics_network_io_rate{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\"}", "format": "time_series", + "hide": false, "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{connector}}-{{task}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Partition Count", + "title": "Network IO Rate", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The average time in milliseconds taken by this task to put a batch of sinks records", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -5272,7 +3815,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -5283,76 +3826,85 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "ms" + "unit": "" }, "overrides": [] }, "gridPos": { - "h": 7, + "h": 10, "w": 8, - "x": 8, - "y": 33 + "x": 0, + "y": 7 }, - "id": 136, + "height": null, + "hideTimeOverride": false, + "id": 42, + "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { - "calcs": [], - "displayMode": "list", + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_connect_sink_task_metrics_put_batch_avg_time_ms{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\",task!=\"\"}", + "datasource": null, + "expr": "kafka_connect_connect_metrics_connection_count{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\"}", "format": "time_series", "hide": false, "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{connector}}-{{task}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Put Batch Average time", + "title": "Active Connections", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The maximum time in milliseconds taken by this task to put a batch of sinks records", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -5363,7 +3915,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -5374,221 +3926,235 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "ms" + "unit": "" }, "overrides": [] }, "gridPos": { - "h": 7, + "h": 10, "w": 8, - "x": 16, - "y": 33 + "x": 8, + "y": 7 }, - "id": 137, + "height": null, + "hideTimeOverride": false, + "id": 43, + "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { - "calcs": [], - "displayMode": "list", + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_connect_sink_task_metrics_put_batch_max_time_ms{namespace=\"$ns\",pod=~\"$pod\",connector=~\"$connector\",task!=\"\"}", + "datasource": null, + "expr": "kafka_connect_connect_metrics_successful_authentication_rate{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}} (success)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "kafka_connect_connect_metrics_failed_authentication_total{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\"}", "format": "time_series", "hide": false, "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{connector}}-{{task}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{pod}} (failed)", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Put Batch Max time", + "title": "Authentications", + "transformations": [], + "transparent": false, "type": "timeseries" } ], - "title": "Sink metrics", + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Connect Workers", + "transformations": [], + "transparent": false, "type": "row" } ], - "refresh": "1m", - "schemaVersion": 30, + "refresh": "30s", + "rows": [], + "schemaVersion": 12, + "sharedCrosshair": false, "style": "dark", - "tags": [], + "tags": [ + "confluent", + "kafka-connect" + ], "templating": { "list": [ { - "allValue": ".+", + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { "selected": false, - "text": "dev", - "value": "dev" + "tags": [], + "text": null, + "value": null }, "datasource": "Prometheus", - "definition": "label_values(ns)", - "description": null, - "error": null, "hide": 0, "includeAll": false, - "label": "Namespace", + "label": "Environment", "multi": false, - "name": "ns", + "name": "env", "options": [], - "query": { - "query": "label_values(namespace)", - "refId": "Prometheus-ns-Variable-Query" - }, + "query": "label_values(namespace)", "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, "type": "query", "useTags": false }, { - "allValue": ".+", + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { - "selected": true, - "text": [ - "All" - ], - "value": [ - "$__all" - ] + "selected": false, + "tags": [], + "text": null, + "value": null }, "datasource": "Prometheus", - "definition": "label_values(kafka_connect_cluster_id)", - "description": null, - "error": null, "hide": 0, - "includeAll": true, - "label": "Cluster ID", - "multi": true, - "name": "kafka_connect_cluster_id", + "includeAll": false, + "label": "Connect cluster", + "multi": false, + "name": "connect_cluster", "options": [], - "query": { - "query": "label_values(kafka_connect_cluster_id)", - "refId": "StandardVariableQuery" - }, + "query": "label_values(kafka_connect_connect_worker_metrics_connector_total_task_count{namespace=\"$env\"}, app)", "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, "type": "query", "useTags": false }, { "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { "selected": false, - "text": "All", - "value": "$__all" + "tags": [], + "text": null, + "value": null }, "datasource": "Prometheus", - "definition": "label_values(kafka_connect_app_info{namespace=\"$ns\"},pod)", - "description": null, - "error": null, "hide": 0, "includeAll": true, - "label": "Pod", - "multi": false, - "name": "pod", + "label": "Connect worker", + "multi": true, + "name": "connect_worker", "options": [], - "query": { - "query": "label_values(kafka_connect_app_info{namespace=\"$ns\"},pod)", - "refId": "Prometheus-pod-Variable-Query" - }, + "query": "label_values(kafka_connect_connect_worker_metrics_connector_total_task_count{namespace=\"$env\",app=\"$connect_cluster\"}, pod)", "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, "type": "query", "useTags": false }, { "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { - "selected": true, - "text": [ - "All" - ], - "value": [ - "$__all" - ] + "selected": false, + "tags": [], + "text": null, + "value": null }, "datasource": "Prometheus", - "definition": "label_values(kafka_connect_connector_task_metrics_pause_ratio{namespace=\"$ns\"},connector)", - "description": null, - "error": null, "hide": 0, "includeAll": true, - "label": "Connector name", + "label": "Connector", "multi": true, "name": "connector", "options": [], - "query": { - "query": "label_values(kafka_connect_connector_task_metrics_pause_ratio{namespace=\"$ns\"},connector)", - "refId": "Prometheus-connector-Variable-Query" - }, + "query": "label_values(kafka_connect_connect_worker_metrics_connector_total_task_count{namespace=\"$env\",app=\"$connect_cluster\"}, connector)", "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, "type": "query", "useTags": false } ] }, "time": { - "from": "now-12h", + "from": "now-1h", "to": "now" }, "timepicker": { "hidden": false, "refresh_intervals": [ + "5s", "10s", "30s", "1m", @@ -5611,8 +4177,8 @@ "30d" ] }, - "timezone": "", - "title": "Kafka Connect cluster", - "uid": "AEaSQ97mz", - "version": 1 -} \ No newline at end of file + "timezone": "browser", + "title": "Kafka Connect cluster - v2", + "uid": null, + "version": 0 +} diff --git a/grafana-dashboards/cfk/kafka-consumer.json b/cfk-prometheus-grafana/grafana/kafka-consumer.json similarity index 100% rename from grafana-dashboards/cfk/kafka-consumer.json rename to cfk-prometheus-grafana/grafana/kafka-consumer.json diff --git a/grafana-dashboards/cfk/kafka-producer.json b/cfk-prometheus-grafana/grafana/kafka-producer.json similarity index 100% rename from grafana-dashboards/cfk/kafka-producer.json rename to cfk-prometheus-grafana/grafana/kafka-producer.json diff --git a/grafana-dashboards/cfk/kafka-quotas.json b/cfk-prometheus-grafana/grafana/kafka-quotas.json similarity index 100% rename from grafana-dashboards/cfk/kafka-quotas.json rename to cfk-prometheus-grafana/grafana/kafka-quotas.json diff --git a/cfk-prometheus-grafana/grafana/kafka-topics.json b/cfk-prometheus-grafana/grafana/kafka-topics.json index 9906db03..20e0198c 100644 --- a/cfk-prometheus-grafana/grafana/kafka-topics.json +++ b/cfk-prometheus-grafana/grafana/kafka-topics.json @@ -1,109 +1,71 @@ { + "__inputs": [ + { + "description": "", + "label": "Prometheus", + "name": "DS_PROMETHEUS", + "pluginId": "prometheus", + "pluginName": "Prometheus", + "type": "datasource" + } + ], "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "target": { - "limit": 100, - "matchAny": false, - "tags": [], - "type": "dashboard" - }, - "type": "dashboard" - } - ] + "list": [] }, + "description": "Overview of the Kafka topics", "editable": true, "gnetId": null, - "graphTooltip": 0, - "id": 4, - "iteration": 1647426704713, + "hideControls": false, + "id": null, "links": [], "panels": [ { + "cacheTimeout": null, "collapsed": false, "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "id": 19, - "panels": [], - "title": "Overview", - "type": "row" - }, - { - "datasource": null, + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "orange", - "value": 1000 - }, - { - "color": "red", - "value": 10000 - } - ] + "steps": [] } - }, - "overrides": [] + } }, "gridPos": { - "h": 5, - "w": 4, + "h": 1, + "w": 24, "x": 0, - "y": 1 - }, - "id": 9, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto" + "y": 0 }, - "pluginVersion": "8.1.3", - "targets": [ - { - "exemplar": true, - "expr": "sum(kafka_controller_kafkacontroller_globaltopiccount{namespace=\"$ns\"})", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], + "height": null, + "hideTimeOverride": false, + "id": 1, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], "timeFrom": null, "timeShift": null, - "title": "Total # of Topics", - "type": "stat" + "title": "Throughput", + "transformations": [], + "transparent": false, + "type": "row" }, { + "cacheTimeout": null, "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -114,7 +76,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -125,50 +87,47 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, + "showPoints": "auto", + "spanNulls": false, "stacking": { - "group": "A", - "mode": "none" + "mode": "normal" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "short" + "unit": "cps" }, "overrides": [] }, "gridPos": { "h": 10, - "w": 13, - "x": 4, - "y": 1 + "w": 12, + "x": 0, + "y": 0 }, + "height": null, + "hideTimeOverride": false, "id": 2, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -177,23 +136,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "topk(10, sum without(instance) (rate(kafka_server_brokertopicmetrics_messagesinpersec{topic=~\"$topic\",namespace=\"$ns\"}[5m])))", + "datasource": null, + "expr": "topk(10, sum without(instance,pod,statefulset_kubernetes_io_pod_name) (rate(kafka_server_brokertopicmetrics_messagesinpersec{topic=~\"$topic\",namespace=\"$env\"}[5m])))", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{topic}}", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Messages In", + "title": "Messages In/Sec", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -204,7 +178,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -215,32 +189,22 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, + "showPoints": "auto", + "spanNulls": false, "stacking": { - "group": "A", - "mode": "none" + "mode": "normal" }, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "bytes" }, @@ -248,17 +212,24 @@ }, "gridPos": { "h": 10, - "w": 7, - "x": 17, - "y": 1 + "w": 12, + "x": 12, + "y": 0 }, - "id": 17, + "height": null, + "hideTimeOverride": false, + "id": 3, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -267,82 +238,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "topk(10, sum(kafka_log_log_size{namespace=\"$ns\",topic=~\"$topic\"}) by (topic))", + "datasource": null, + "expr": "topk(10, sum(kafka_log_log_size{namespace=\"$env\",topic=~\"$topic\"}) by (topic))", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{topic}}", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, "title": "Log size", + "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": null, - "fieldConfig": { - "defaults": { - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 10000 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 0, - "y": 6 - }, - "id": 11, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "exemplar": true, - "expr": "sum(kafka_controller_kafkacontroller_globalpartitioncount{namespace=\"$ns\"})", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Total # of Partitions", - "type": "stat" - }, - { + "cacheTimeout": null, "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -353,7 +280,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -364,13 +291,13 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, + "showPoints": "auto", + "spanNulls": false, "stacking": { - "group": "A", - "mode": "none" + "mode": "normal" }, "thresholdsStyle": { "mode": "off" @@ -379,34 +306,32 @@ "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "binBps" }, "overrides": [] }, "gridPos": { - "h": 9, + "h": 10, "w": 12, "x": 0, - "y": 11 + "y": 1 }, - "id": 3, + "height": null, + "hideTimeOverride": false, + "id": 4, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -415,23 +340,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "topk(10, sum without(instance) (rate(kafka_server_brokertopicmetrics_bytesinpersec{topic=~\"$topic\",namespace=\"$ns\"}[5m])))", + "datasource": null, + "expr": "topk(10, sum without(instance,pod,statefulset_kubernetes_io_pod_name) (rate(kafka_server_brokertopicmetrics_bytesinpersec{topic=~\"$topic\",namespace=\"$env\"}[5m])))", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{topic}}", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Bytes In", + "title": "Bytes In/Sec", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -442,7 +382,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -453,13 +393,13 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, + "showPoints": "auto", + "spanNulls": false, "stacking": { - "group": "A", - "mode": "none" + "mode": "normal" }, "thresholdsStyle": { "mode": "off" @@ -468,34 +408,32 @@ "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "binBps" }, "overrides": [] }, "gridPos": { - "h": 9, + "h": 10, "w": 12, "x": 12, - "y": 11 + "y": 1 }, - "id": 4, + "height": null, + "hideTimeOverride": false, + "id": 5, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -504,24 +442,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "topk(10, sum without(instance) (rate(kafka_server_brokertopicmetrics_bytesoutpersec{topic=~\"$topic\",namespace=\"$ns\"}[5m])))", + "datasource": null, + "expr": "topk(10, sum without(instance,pod,statefulset_kubernetes_io_pod_name) (rate(kafka_server_brokertopicmetrics_bytesoutpersec{topic=~\"$topic\",namespace=\"$env\"}[5m])))", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{topic}}", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Bytes Out", + "title": "Bytes Out/Sec", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -532,7 +484,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -543,50 +495,47 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, + "showPoints": "auto", + "spanNulls": false, "stacking": { - "group": "A", - "mode": "none" + "mode": "normal" }, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "short" + "unit": "reqps" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 10, "w": 12, "x": 0, - "y": 20 + "y": 2 }, - "id": 13, + "height": null, + "hideTimeOverride": false, + "id": 6, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -595,24 +544,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "topk(10, sum(rate(kafka_server_brokertopicmetrics_totalproducerequestspersec{ namespace=\"$ns\", topic=~\"$topic\"}[5m])) by (topic))", + "datasource": null, + "expr": "topk(10, sum(rate(kafka_server_brokertopicmetrics_totalproducerequestspersec{ namespace=\"$env\", topic=~\"$topic\"}[5m])) by (topic))", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{topic}}", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Produce Request per sec", + "title": "Produce Requests/Sec", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -623,7 +586,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -634,50 +597,47 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, + "showPoints": "auto", + "spanNulls": false, "stacking": { - "group": "A", - "mode": "none" + "mode": "normal" }, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "short" + "unit": "reqps" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 10, "w": 12, "x": 12, - "y": 20 + "y": 2 }, - "id": 15, + "height": null, + "hideTimeOverride": false, + "id": 7, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -686,139 +646,127 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "topk(10, sum(rate(kafka_server_brokertopicmetrics_totalfetchrequestspersec{ namespace=\"$ns\",topic=~\"$topic\"}[5m])) by (topic))", + "datasource": null, + "expr": "topk(10, sum(rate(kafka_server_brokertopicmetrics_totalfetchrequestspersec{ namespace=\"$env\", topic=~\"$topic\"}[5m])) by (topic))", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{topic}}", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Fetch Request per sec", + "title": "Consumer Fetch Requests/Sec", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 28 + "y": 3 }, - "id": 21, + "height": null, + "hideTimeOverride": false, + "id": 8, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { + "cacheTimeout": null, + "color": { + "mode": "thresholds" + }, + "columns": [], "datasource": "Prometheus", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "custom": { - "align": null, + "align": "auto", "displayMode": "auto", - "filterable": false + "filterable": true }, - "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] } }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "partition" - }, - "properties": [ - { - "id": "custom.width", - "value": 103 - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "offset" - }, - "properties": [ - { - "id": "custom.width", - "value": 137 - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "instance" - }, - "properties": [ - { - "id": "custom.width", - "value": 155 - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "topic" - }, - "properties": [ - { - "id": "custom.width", - "value": 294 - } - ] - } - ] + "overrides": [] }, + "fontSize": "100%", "gridPos": { - "h": 8, + "h": 10, "w": 12, "x": 0, - "y": 29 + "y": 3 }, - "id": 6, + "height": null, + "hideTimeOverride": false, + "id": 9, + "interval": null, + "links": [], + "mappings": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { - "showHeader": true, - "sortBy": [ - { - "desc": false, - "displayName": "partition" - } - ] + "showHeader": true }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": 6, "targets": [ { - "exemplar": true, - "expr": "kafka_log_log_logstartoffset{namespace=\"$ns\",topic=\"$topic\"}", + "datasource": null, + "expr": "kafka_log_log_logstartoffset{namespace=\"$env\",topic=~\"$topic\"}", "format": "table", + "hide": false, "instant": true, "interval": "", - "legendFormat": "", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{topic}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Start Offset", + "title": "Start Offsets", "transformations": [ { "id": "organize", @@ -826,123 +774,132 @@ "excludeByName": { "Time": true, "__name__": true, - "env": true, - "instance": false, - "job": true + "app": true, + "clusterId": true, + "confluentPlatform": true, + "confluent_platform": true, + "controller_revision_hash": true, + "instance": true, + "job": true, + "namespace": true, + "platform_confluent_io_type": true, + "statefulset_kubernetes_io_pod_name": true, + "type": true }, "indexByName": { - "Time": 0, - "Value": 7, - "__name__": 1, - "env": 2, - "instance": 3, - "job": 4, - "partition": 6, - "topic": 5 + "Value": 4, + "partition": 3, + "pod": 1, + "topic": 2 }, "renameByName": { "Value": "offset" } } + }, + { + "id": "convertFieldType", + "options": { + "conversions": [ + { + "destinationType": "number", + "targetField": "partition" + } + ], + "fields": {} + } + }, + { + "id": "sortBy", + "options": { + "fields": {}, + "sort": [ + { + "field": "topic" + } + ] + } + }, + { + "id": "sortBy", + "options": { + "fields": {}, + "sort": [ + { + "field": "partition" + } + ] + } } ], + "transparent": false, "type": "table" }, { + "cacheTimeout": null, + "color": { + "mode": "thresholds" + }, + "columns": [], "datasource": "Prometheus", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "custom": { - "align": null, + "align": "auto", "displayMode": "auto", - "filterable": false + "filterable": true }, - "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] } }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "partition" - }, - "properties": [ - { - "id": "custom.width", - "value": 103 - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "offset" - }, - "properties": [ - { - "id": "custom.width", - "value": 105 - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "topic" - }, - "properties": [ - { - "id": "custom.width", - "value": 289 - } - ] - } - ] + "overrides": [] }, + "fontSize": "100%", "gridPos": { - "h": 8, + "h": 10, "w": 12, "x": 12, - "y": 29 + "y": 3 }, - "id": 7, + "height": null, + "hideTimeOverride": false, + "id": 10, + "interval": null, + "links": [], + "mappings": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { - "showHeader": true, - "sortBy": [ - { - "desc": false, - "displayName": "partition" - } - ] + "showHeader": true }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": 6, "targets": [ { - "exemplar": true, - "expr": "kafka_log_log_logendoffset{namespace=\"$ns\",topic=\"$topic\"}", + "datasource": null, + "expr": "kafka_log_log_logendoffset{namespace=\"$env\",topic=~\"$topic\"}", "format": "table", + "hide": false, "instant": true, "interval": "", - "legendFormat": "", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{topic}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "End Offset", + "title": "End Offsets", "transformations": [ { "id": "organize", @@ -950,112 +907,179 @@ "excludeByName": { "Time": true, "__name__": true, - "env": true, - "instance": false, - "job": true + "app": true, + "clusterId": true, + "confluentPlatform": true, + "confluent_platform": true, + "controller_revision_hash": true, + "instance": true, + "job": true, + "namespace": true, + "platform_confluent_io_type": true, + "statefulset_kubernetes_io_pod_name": true, + "type": true }, "indexByName": { - "Time": 0, - "Value": 7, - "__name__": 1, - "env": 2, - "instance": 3, - "job": 4, - "partition": 6, - "topic": 5 + "Value": 4, + "partition": 3, + "pod": 1, + "topic": 2 }, "renameByName": { "Value": "offset" } } + }, + { + "id": "convertFieldType", + "options": { + "conversions": [ + { + "destinationType": "number", + "targetField": "partition" + } + ], + "fields": {} + } + }, + { + "id": "sortBy", + "options": { + "fields": {}, + "sort": [ + { + "field": "topic" + } + ] + } + }, + { + "id": "sortBy", + "options": { + "fields": {}, + "sort": [ + { + "field": "partition" + } + ] + } } ], + "transparent": false, "type": "table" } ], - "title": "Topic offsets", + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Offsets", + "transformations": [], + "transparent": false, "type": "row" } ], - "refresh": "1m", - "schemaVersion": 30, + "refresh": "30s", + "rows": [], + "schemaVersion": 12, + "sharedCrosshair": false, "style": "dark", - "tags": [], + "tags": [ + "confluent", + "kafka" + ], "templating": { "list": [ { "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { "selected": false, - "text": "dev", - "value": "dev" + "tags": [], + "text": null, + "value": null }, "datasource": "Prometheus", - "definition": "label_values(namespace)", - "description": null, - "error": null, "hide": 0, "includeAll": false, - "label": "Namespace", + "label": "Environment", "multi": false, - "name": "ns", + "name": "env", "options": [], - "query": { - "query": "label_values(namespace)", - "refId": "Prometheus-namespace-Variable-Query" - }, + "query": "label_values(namespace)", "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, "type": "query", "useTags": false }, { "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { - "selected": true, - "text": [ - "All" - ], - "value": [ - "$__all" - ] + "selected": false, + "tags": [], + "text": null, + "value": null }, "datasource": "Prometheus", - "definition": "label_values(kafka_log_log_size{namespace=\"$ns\"},topic)", - "description": null, - "error": null, "hide": 0, "includeAll": true, - "label": "Topic name", + "label": "Topic", "multi": true, "name": "topic", "options": [], - "query": { - "query": "label_values(kafka_log_log_size,topic)", - "refId": "Prometheus-topic-Variable-Query" - }, + "query": "label_values(kafka_log_log_size{namespace=\"$env\"}, topic)", "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, "type": "query", "useTags": false } ] }, "time": { - "from": "now-12h", + "from": "now-1h", "to": "now" }, - "timepicker": {}, - "timezone": "", - "title": "Kafka Topics", - "uid": "vQT4b1-Mz", - "version": 1 -} \ No newline at end of file + "timepicker": { + "hidden": false, + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Kafka topics - v2", + "uid": null, + "version": 0 +} diff --git a/cfk-prometheus-grafana/grafana/ksqldb-cluster.json b/cfk-prometheus-grafana/grafana/ksqldb-cluster.json index 1c9cffef..054efed1 100644 --- a/cfk-prometheus-grafana/grafana/ksqldb-cluster.json +++ b/cfk-prometheus-grafana/grafana/ksqldb-cluster.json @@ -1,73 +1,87 @@ { + "__inputs": [ + { + "description": "", + "label": "Prometheus", + "name": "DS_PROMETHEUS", + "pluginId": "prometheus", + "pluginName": "Prometheus", + "type": "datasource" + } + ], "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "target": { - "limit": 100, - "matchAny": false, - "tags": [], - "type": "dashboard" - }, - "type": "dashboard" - } - ] + "list": [] }, + "description": "Overview of ksqlDB clusters.", "editable": true, - "fiscalYearStartMonth": 0, - "graphTooltip": 0, - "id": 9, - "iteration": 1647515912982, + "gnetId": null, + "hideControls": false, + "id": null, "links": [], - "liveNow": false, "panels": [ { + "cacheTimeout": null, "collapsed": false, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }, - "id": 29, + "height": null, + "hideTimeOverride": false, + "id": 1, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "Overview", + "transformations": [], + "transparent": false, "type": "row" }, { - "description": "Average number of active queries per server.", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "ksqlDB online instances returning metrics.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#d44a3a", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 0 - }, - { - "color": "#299c46", - "value": 1 + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] }, @@ -76,80 +90,81 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 0, - "y": 1 + "y": 0 }, - "id": 18, + "height": null, + "hideTimeOverride": false, + "id": 2, + "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.4.2", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "datasource": { - "type": "prometheus", - "uid": "a65Bu5Enk" - }, - "exemplar": false, - "expr": "avg(ksql_ksql_engine_query_stats_num_active_queries{namespace=\"$ns\", ksql_cluster=\"$ksqldb_cluster_id\", pod=~\"$pod\"})", - "instant": true, + "datasource": null, + "expr": "count(ksql_ksql_engine_query_stats_num_active_queries{namespace=\"$env\", app=\"$ksqldb_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Active Queries", + "timeFrom": null, + "timeShift": null, + "title": "ksqlDB: Online Servers", + "transformations": [], + "transparent": false, "type": "stat" }, { - "datasource": { - "type": "prometheus", - "uid": "a65Bu5Enk" - }, - "description": "Num of created queries", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "Number of active queries deployed in the cluster.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#299c46", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 500 - }, - { - "color": "#d44a3a", - "value": 800 + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] }, @@ -158,74 +173,89 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 4, - "y": 1 + "y": 0 }, - "id": 20, + "height": null, + "hideTimeOverride": false, + "id": 3, + "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.4.2", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "avg(ksql_ksql_engine_query_stats_running_queries{namespace=\"$ns\", ksql_cluster=\"$ksqldb_cluster_id\", pod=~\"$pod\"})", + "datasource": null, + "expr": "sum(ksql_ksql_engine_query_stats_num_active_queries{namespace=\"$env\", app=\"$ksqldb_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Running Queries", + "timeFrom": null, + "timeShift": null, + "title": "ksqlDB: Sum of Active Queries", + "transformations": [], + "transparent": false, "type": "stat" }, { - "datasource": { - "type": "prometheus", - "uid": "a65Bu5Enk" - }, - "description": "Num of persisted queries", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "Number of running queries deployed in the cluster.\n Ideally, this number should be equal to the number of active queries as queries should be running.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#d44a3a", - "value": null + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" }, { - "color": "rgba(237, 129, 40, 0.89)", - "value": 0 - }, - { - "color": "#299c46", - "value": 1 + "color": "green", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" } ] }, @@ -234,226 +264,89 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 8, - "y": 1 - }, - "id": 2, - "links": [], - "maxDataPoints": 100, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "8.4.2", - "targets": [ - { - "expr": "avg(ksql_ksql_engine_query_stats_num_persistent_queries{namespace=\"$ns\", ksql_cluster=\"$ksqldb_cluster_id\", pod=~\"$pod\"})", - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "title": "Total Persisted Queries", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "a65Bu5Enk" - }, - "description": "Num of rebalancing queries", - "fieldConfig": { - "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "#299c46", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 1 - }, - { - "color": "#d44a3a", - "value": 5 - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 12, - "y": 1 - }, - "id": 16, - "links": [], - "maxDataPoints": 100, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "8.4.2", - "targets": [ - { - "expr": "sum(ksql_ksql_engine_query_stats_rebalancing_queries{namespace=\"$ns\", ksql_cluster=\"$ksqldb_cluster_id\", pod=~\"$pod\"})", - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "title": "Rebalancing Queries", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "a65Bu5Enk" - }, - "description": "Number of error query", - "fieldConfig": { - "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "#299c46", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 1 - }, - { - "color": "#d44a3a", - "value": 2 - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 16, - "y": 1 + "y": 0 }, + "height": null, + "hideTimeOverride": false, "id": 4, + "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.4.2", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "avg(ksql_ksql_engine_query_stats_error_queries{namespace=\"$ns\", ksql_cluster=\"$ksqldb_cluster_id\", pod=~\"$pod\"})", + "datasource": null, + "expr": "sum(ksql_ksql_engine_query_stats_running_queries{namespace=\"$env\", app=\"$ksqldb_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Queries in Error State", + "timeFrom": null, + "timeShift": null, + "title": "ksqlDB: Sum of Running Queries", + "transformations": [], + "transparent": false, "type": "stat" }, { - "datasource": { - "type": "prometheus", - "uid": "a65Bu5Enk" - }, - "description": "Num of idle queries", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "Number of queries rebalancing in the cluster.\n Ideally, this number should be equal zero, or return to zero in a short period (e.g. 1 minute).\n It's recommended to alert if the number of rebalancing queries stay higher than 0 for a longer period of time.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#299c46", - "value": null + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" }, { - "color": "rgba(237, 129, 40, 0.89)", - "value": 1 - }, - { - "color": "#d44a3a", - "value": 5 + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" } ] }, @@ -462,183 +355,89 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, - "x": 20, - "y": 1 + "x": 12, + "y": 0 }, - "id": 19, + "height": null, + "hideTimeOverride": false, + "id": 5, + "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.4.2", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(ksql_ksql_engine_query_stats_num_idle_queries{namespace=\"$ns\", ksql_cluster=\"$ksqldb_cluster_id\", pod=~\"$pod\"})", + "datasource": null, + "expr": "sum(ksql_ksql_engine_query_stats_rebalancing_queries{namespace=\"$env\", app=\"$ksqldb_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Idle Queries", + "timeFrom": null, + "timeShift": null, + "title": "ksqlDB: Sum of Rebalancing Queries", + "transformations": [], + "transparent": false, "type": "stat" }, { - "datasource": { - "type": "prometheus", - "uid": "a65Bu5Enk" - }, + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "Number of queries failed in the cluster.\n Ideally, this number should be equal zero.\n It's recommended to alert if the number of queries failed is higher than 0.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "custom": { - "align": "auto", - "displayMode": "auto", - "filterable": false - }, + "custom": {}, + "decimals": null, "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" }, { "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "ksql_query" - }, - "properties": [ - { - "id": "custom.width", - "value": 426 - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "instance" - }, - "properties": [ - { - "id": "custom.width", - "value": 381 - } - ] - } - ] - }, - "gridPos": { - "h": 9, - "w": 16, - "x": 0, - "y": 5 - }, - "id": 23, - "options": { - "footer": { - "fields": "", - "reducer": [ - "sum" - ], - "show": false - }, - "showHeader": true, - "sortBy": [] - }, - "pluginVersion": "8.4.2", - "targets": [ - { - "expr": "ksql_ksql_metrics_ksql_queries_query_status{namespace=\"$ns\", ksql_cluster=\"$ksqldb_cluster_id\", pod=~\"$pod\"}", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "title": "Queries Status", - "transformations": [ - { - "id": "organize", - "options": { - "excludeByName": { - "Time": true, - "Value": true, - "__name__": true, - "env": true, - "job": true, - "ksql_cluster": true - }, - "indexByName": {}, - "renameByName": { - "Time": "", - "__name__": "", - "instance": "", - "ksql_cluster": "", - "ksql_query": "" - } - } - } - ], - "type": "table" - }, - { - "datasource": { - "type": "prometheus", - "uid": "a65Bu5Enk" - }, - "description": "Num of not running queries", - "fieldConfig": { - "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "#299c46", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 1 - }, - { - "color": "#d44a3a", - "value": 5 + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" } ] }, @@ -647,123 +446,65 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 16, - "y": 5 - }, - "id": 5, - "links": [], - "maxDataPoints": 100, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "8.4.2", - "targets": [ - { - "expr": "sum(ksql_ksql_engine_query_stats_not_running_queries{namespace=\"$ns\", ksql_cluster=\"$ksqldb_cluster_id\", pod=~\"$pod\"})", - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "title": "Stopped Queries", - "type": "stat" - }, - { - "datasource": { - "type": "prometheus", - "uid": "a65Bu5Enk" - }, - "description": "Num of running queries", - "fieldConfig": { - "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "#299c46", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 1 - }, - { - "color": "#d44a3a", - "value": 5 - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 20, - "y": 5 + "y": 0 }, - "id": 15, + "height": null, + "hideTimeOverride": false, + "id": 6, + "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.4.2", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(ksql_ksql_engine_query_stats_pending_shutdown_queries{namespace=\"$ns\", ksql_cluster=\"$ksqldb_cluster_id\", pod=~\"$pod\"})", + "datasource": null, + "expr": "avg(ksql_ksql_engine_query_stats_error_queries{namespace=\"$env\", app=\"$ksqldb_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Currently Shutting Down Queries", + "timeFrom": null, + "timeShift": null, + "title": "Connect: Sum of Queries Failed", + "transformations": [], + "transparent": false, "type": "stat" }, { - "datasource": { - "type": "prometheus", - "uid": "a65Bu5Enk" - }, - "description": "Cluster liveness", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -774,7 +515,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -785,79 +526,85 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "max": 1, - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "short" + "unit": "" }, "overrides": [] }, "gridPos": { - "h": 5, + "h": 10, "w": 8, - "x": 16, - "y": 9 + "x": 0, + "y": 1 }, - "id": 17, + "height": null, + "hideTimeOverride": false, + "id": 7, + "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull" + "max", + "mean", + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { - "mode": "single", - "sort": "none" + "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "ksql_ksql_engine_query_stats_liveness_indicator{namespace=\"$ns\", ksql_cluster=\"$ksqldb_cluster_id\", pod=~\"$pod\"}", + "datasource": null, + "expr": "ksql_ksql_engine_query_stats_liveness_indicator{namespace=\"$env\",app=\"$ksqldb_cluster\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Cluster liveness", + "timeFrom": null, + "timeShift": null, + "title": "Cluster Liveness", + "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": { - "type": "prometheus", - "uid": "a65Bu5Enk" - }, - "description": "Message consumed/sec", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -868,7 +615,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -879,78 +626,85 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "short" + "unit": "cps" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 14 + "h": 10, + "w": 8, + "x": 8, + "y": 1 }, - "id": 21, + "height": null, + "hideTimeOverride": false, + "id": 8, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" }, "tooltip": { - "mode": "single", - "sort": "none" + "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "ksql_ksql_engine_query_stats_messages_consumed_per_sec{namespace=\"$ns\", ksql_cluster=\"$ksqldb_cluster_id\", pod=~\"$pod\"}", + "datasource": null, + "expr": "ksql_ksql_engine_query_stats_messages_consumed_per_sec{namespace=\"$env\",app=\"$ksqldb_cluster\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Message consumed/sec", + "timeFrom": null, + "timeShift": null, + "title": "Messages consumed/sec", + "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": { - "type": "prometheus", - "uid": "a65Bu5Enk" - }, - "description": "Message produced/sec", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -961,7 +715,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -972,91 +726,126 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "short" + "unit": "cps" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 14 + "h": 10, + "w": 8, + "x": 16, + "y": 1 }, - "id": 7, + "height": null, + "hideTimeOverride": false, + "id": 9, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" }, "tooltip": { - "mode": "single", - "sort": "none" + "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "irate(ksql_ksql_engine_query_stats_messages_produced_per_sec{namespace=\"$ns\", ksql_cluster=\"$ksqldb_cluster_id\", pod=~\"$pod\"}[5m])", + "datasource": null, + "expr": "ksql_ksql_engine_query_stats_messages_produced_per_sec{namespace=\"$env\",app=\"$ksqldb_cluster\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Message produced/sec", + "timeFrom": null, + "timeShift": null, + "title": "Messages produced/sec", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "collapsed": false, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 22 + "y": 2 }, - "id": 33, + "height": null, + "hideTimeOverride": false, + "id": 10, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "System", + "transformations": [], + "transparent": false, "type": "row" }, { - "datasource": { - "type": "prometheus", - "uid": "a65Bu5Enk" - }, - "description": "", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "Rate of CPU seconds used by the Java process.\n 100% usage represents one core. \n If there are multiple cores, the total capacity should be 100% * number_cores.", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -1067,7 +856,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -1078,77 +867,85 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "percentunit" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 10, "w": 8, "x": 0, - "y": 23 + "y": 2 }, - "id": 12, + "height": null, + "hideTimeOverride": false, + "id": 11, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" }, "tooltip": { - "mode": "single", - "sort": "none" + "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "irate(process_cpu_seconds_total{namespace=\"$ns\", pod=~\"$pod\"}[5m])", + "datasource": null, + "expr": "irate(process_cpu_seconds_total{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\"}[5m])", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "CPU Usage", + "timeFrom": null, + "timeShift": null, + "title": "CPU usage", + "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": { - "type": "prometheus", - "uid": "a65Bu5Enk" - }, - "description": "", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "Sum of JVM memory used, without including areas (e.g. heap size).", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -1159,7 +956,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -1170,83 +967,85 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "decbytes" + "unit": "bytes" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 10, "w": 8, "x": 8, - "y": 23 + "y": 2 }, - "id": 24, + "height": null, + "hideTimeOverride": false, + "id": 12, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" }, "tooltip": { - "mode": "single", - "sort": "none" + "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum without(area)(jvm_memory_bytes_used{namespace=\"$ns\",pod=~\"$pod\"})", - "interval": "", - "legendFormat": "Used:{{instance}}", - "refId": "A" - }, - { - "expr": "jvm_memory_bytes_max{namespace=\"$ns\",area=\"heap\"}", + "datasource": null, + "expr": "sum without(area)(jvm_memory_bytes_used{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "Max:{{instance}}", - "refId": "B" + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "JVM Memory Used", + "timeFrom": null, + "timeShift": null, + "title": "Memory usage", + "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": { - "type": "prometheus", - "uid": "a65Bu5Enk" - }, - "description": "", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "Sum of seconds used by Garbage Collection.", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -1257,7 +1056,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -1268,89 +1067,115 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "decimals": 4, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "short" + "unit": "percentunit" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 10, "w": 8, "x": 16, - "y": 23 + "y": 2 }, - "id": 14, + "height": null, + "hideTimeOverride": false, + "id": 13, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" }, "tooltip": { - "mode": "single", - "sort": "none" + "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum without(gc)(rate(jvm_gc_collection_seconds_sum{namespace=\"$ns\", pod=~\"$pod\"}[5m]))", + "datasource": null, + "expr": "sum without(gc)(irate(jvm_gc_collection_seconds_sum{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\"}[5m]))", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Time spent in GC", + "timeFrom": null, + "timeShift": null, + "title": "GC collection", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 31 + "y": 3 }, - "id": 31, + "height": null, + "hideTimeOverride": false, + "id": 14, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { - "datasource": { - "type": "prometheus", - "uid": "a65Bu5Enk" - }, - "description": "", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -1372,49 +1197,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 11, - "w": 6, + "h": 10, + "w": 8, "x": 0, - "y": 32 + "y": 3 }, - "id": 26, + "height": null, + "hideTimeOverride": false, + "id": 15, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -1423,31 +1244,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_streams_stream_thread_metrics_poll_latency_avg{namespace=\"$ns\", thread_id=~\".+$ksqldb_cluster_id.+\", pod=~\"$pod\"}", + "datasource": null, + "expr": "kafka_streams_stream_thread_metrics_poll_latency_avg{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", "hide": false, "instant": false, "interval": "", - "legendFormat": "{{thread_id}}_avg", - "refId": "A" - }, - { - "refId": "C" + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Poll Latency (Avg)", + "timeFrom": null, + "timeShift": null, + "title": "Poll Latency (Avg.)", "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": { - "type": "prometheus", - "uid": "a65Bu5Enk" - }, - "description": "", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -1466,75 +1294,88 @@ "viz": false }, "lineInterpolation": "linear", - "lineWidth": 2, + "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 11, - "w": 6, - "x": 6, - "y": 32 - }, - "id": 35, + "h": 10, + "w": 8, + "x": 8, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 16, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { - "calcs": [], - "displayMode": "list", + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_streams_stream_thread_metrics_poll_latency_max{namespace=\"$ns\", thread_id=~\".+$ksqldb_cluster_id.+\", pod=~\"$pod\"}", + "datasource": null, + "expr": "kafka_streams_stream_thread_metrics_poll_latency_max{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{thread_id}}_max", - "refId": "B" + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Poll Latency (Max)", + "timeFrom": null, + "timeShift": null, + "title": "Poll Latency (Max.)", "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": { - "type": "prometheus", - "uid": "a65Bu5Enk" - }, - "description": "", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -1556,74 +1397,85 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 11, - "w": 6, - "x": 12, - "y": 32 + "h": 10, + "w": 8, + "x": 0, + "y": 4 }, - "id": 25, + "height": null, + "hideTimeOverride": false, + "id": 17, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { - "calcs": [], - "displayMode": "list", + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_streams_stream_thread_metrics_process_latency_avg{namespace=\"$ns\", thread_id=~\".+$ksqldb_cluster_id.+\", pod=~\"$pod\"}", + "datasource": null, + "expr": "kafka_streams_stream_thread_metrics_process_latency_avg{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", + "hide": false, "instant": false, "interval": "", - "legendFormat": "{{thread_id}}_avg", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Process Latency (Avg)", + "timeFrom": null, + "timeShift": null, + "title": "Process Latency (Avg.)", "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": { - "type": "prometheus", - "uid": "a65Bu5Enk" - }, - "description": "", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -1642,75 +1494,88 @@ "viz": false }, "lineInterpolation": "linear", - "lineWidth": 2, + "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 11, - "w": 6, - "x": 18, - "y": 32 - }, - "id": 34, + "h": 10, + "w": 8, + "x": 8, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 18, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { - "calcs": [], - "displayMode": "list", + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_streams_stream_thread_metrics_process_latency_max{namespace=\"$ns\", thread_id=~\".+$ksqldb_cluster_id.+\", pod=~\"$pod\"}", + "datasource": null, + "expr": "kafka_streams_stream_thread_metrics_process_latency_max{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{thread_id}}_max", - "refId": "B" + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Process Latency Max", + "timeFrom": null, + "timeShift": null, + "title": "Process Latency (Max.)", "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": { - "type": "prometheus", - "uid": "a65Bu5Enk" - }, - "description": "", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -1732,74 +1597,85 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 11, - "w": 6, + "h": 10, + "w": 8, "x": 0, - "y": 43 + "y": 5 }, - "id": 13, + "height": null, + "hideTimeOverride": false, + "id": 19, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { - "calcs": [], - "displayMode": "list", + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_streams_stream_thread_metrics_commit_latency_avg{namespace=\"$ns\", thread_id=~\".+$ksqldb_cluster_id.+\", pod=~\"$pod\"}", + "datasource": null, + "expr": "kafka_streams_stream_thread_metrics_commit_latency_avg{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", + "hide": false, "instant": false, "interval": "", - "legendFormat": "{{thread_id}}_avg", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Commit Latency (Avg)", + "timeFrom": null, + "timeShift": null, + "title": "Commit Latency (Avg.)", "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": { - "type": "prometheus", - "uid": "a65Bu5Enk" - }, - "description": "", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -1821,74 +1697,85 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 11, - "w": 6, - "x": 6, - "y": 43 - }, - "id": 38, + "h": 10, + "w": 8, + "x": 8, + "y": 5 + }, + "height": null, + "hideTimeOverride": false, + "id": 20, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { - "calcs": [], - "displayMode": "list", + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_streams_stream_thread_metrics_commit_latency_max{namespace=\"$ns\", thread_id=~\".+$ksqldb_cluster_id.+\", pod=~\"$pod\"}", + "datasource": null, + "expr": "kafka_streams_stream_thread_metrics_commit_latency_max{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", + "hide": false, "instant": false, "interval": "", - "legendFormat": "{{thread_id}}_avg", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Commit Latency (Max)", + "timeFrom": null, + "timeShift": null, + "title": "Commit Latency (Max.)", "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": { - "type": "prometheus", - "uid": "a65Bu5Enk" - }, - "description": "", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -1910,74 +1797,85 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 11, - "w": 6, - "x": 12, - "y": 43 + "h": 10, + "w": 8, + "x": 0, + "y": 6 }, - "id": 27, + "height": null, + "hideTimeOverride": false, + "id": 21, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { - "calcs": [], - "displayMode": "list", + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_streams_stream_thread_metrics_punctuate_latency_avg{namespace=\"$ns\", thread_id=~\".+$ksqldb_cluster_id.+\", pod=~\"$pod\"}", + "datasource": null, + "expr": "kafka_streams_stream_thread_metrics_punctuate_latency_avg{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", + "hide": false, "instant": false, "interval": "", - "legendFormat": "{{thread_id}}_avg", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Punctuate Latency (Avg)", + "timeFrom": null, + "timeShift": null, + "title": "Punctuate Latency (Avg.)", "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": { - "type": "prometheus", - "uid": "a65Bu5Enk" - }, - "description": "", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -1996,108 +1894,130 @@ "viz": false }, "lineInterpolation": "linear", - "lineWidth": 3, + "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/max/" - }, - "properties": [ - { - "id": "custom.lineStyle", - "value": { - "dash": [ - 5, - 2 - ], - "fill": "dash" - } - } - ] - } - ] + "overrides": [] }, "gridPos": { - "h": 11, - "w": 6, - "x": 18, - "y": 43 - }, - "id": 37, + "h": 10, + "w": 8, + "x": 8, + "y": 6 + }, + "height": null, + "hideTimeOverride": false, + "id": 22, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { - "calcs": [], - "displayMode": "list", + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_streams_stream_thread_metrics_punctuate_latency_max{namespace=\"$ns\", thread_id=~\".+$ksqldb_cluster_id.+\", pod=~\"$pod\"}", + "datasource": null, + "expr": "kafka_streams_stream_thread_metrics_punctuate_latency_max{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{thread_id}}_max", - "refId": "B" + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Punctuate Latency (Max)", + "timeFrom": null, + "timeShift": null, + "title": "Punctuate Latency (Max.)", "transformations": [], + "transparent": false, "type": "timeseries" } ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "Queries Performance", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 32 + "y": 7 }, - "id": 40, + "height": null, + "hideTimeOverride": false, + "id": 23, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { - "datasource": { - "type": "prometheus", - "uid": "a65Bu5Enk" - }, - "description": "", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -2116,53 +2036,48 @@ "viz": false }, "lineInterpolation": "linear", - "lineWidth": 3, + "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "iops" + "steps": [] + }, + "unit": "ops" }, "overrides": [] }, "gridPos": { - "h": 11, - "w": 6, + "h": 10, + "w": 8, "x": 0, - "y": 33 + "y": 7 }, - "id": 36, + "height": null, + "hideTimeOverride": false, + "id": 24, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -2171,25 +2086,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_streams_stream_state_metrics_put_rate{namespace=\"$ns\", thread_id=~\".+$ksqldb_cluster_id.+\", pod=~\"$pod\"}", + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_put_rate{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{thread_id}}", - "refId": "B" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], + "timeFrom": null, + "timeShift": null, "title": "Put Rate", "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": { - "type": "prometheus", - "uid": "a65Bu5Enk" - }, - "description": "", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -2208,52 +2136,48 @@ "viz": false }, "lineInterpolation": "linear", - "lineWidth": 3, + "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 11, - "w": 9, - "x": 6, - "y": 33 + "h": 10, + "w": 8, + "x": 8, + "y": 7 }, - "id": 42, + "height": null, + "hideTimeOverride": false, + "id": 25, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -2262,25 +2186,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_streams_stream_state_metrics_put_latency_avg{thread_id=~\".+$ksqldb_cluster_id.+\"}", + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_put_latency_avg{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{thread_id}}", - "refId": "B" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Put average latency", + "timeFrom": null, + "timeShift": null, + "title": "Put Latency (Avg.)", "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": { - "type": "prometheus", - "uid": "a65Bu5Enk" - }, - "description": "", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -2299,52 +2236,48 @@ "viz": false }, "lineInterpolation": "linear", - "lineWidth": 3, + "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 11, - "w": 9, - "x": 15, - "y": 33 + "h": 10, + "w": 8, + "x": 16, + "y": 7 }, - "id": 43, + "height": null, + "hideTimeOverride": false, + "id": 26, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -2353,25 +2286,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_streams_stream_state_metrics_put_latency_max{thread_id=~\".+$ksqldb_cluster_id.+\"}", + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_put_latency_max{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{thread_id}}", - "refId": "B" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Put max latency", + "timeFrom": null, + "timeShift": null, + "title": "Put Latency (Max.)", "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": { - "type": "prometheus", - "uid": "a65Bu5Enk" - }, - "description": "", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -2390,53 +2336,48 @@ "viz": false }, "lineInterpolation": "linear", - "lineWidth": 3, + "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "iops" + "steps": [] + }, + "unit": "ops" }, "overrides": [] }, "gridPos": { - "h": 11, - "w": 6, + "h": 10, + "w": 8, "x": 0, - "y": 44 + "y": 8 }, - "id": 52, + "height": null, + "hideTimeOverride": false, + "id": 27, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -2445,25 +2386,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_streams_stream_state_metrics_put_if_absent_rate_rate{namespace=\"$ns\", thread_id=~\".+$ksqldb_cluster_id.+\", pod=~\"$pod\"}", + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_put_if_absent_rate{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{thread_id}}", - "refId": "B" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Put if absent rate", + "timeFrom": null, + "timeShift": null, + "title": "Put if absent Rate", "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": { - "type": "prometheus", - "uid": "a65Bu5Enk" - }, - "description": "", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -2482,52 +2436,48 @@ "viz": false }, "lineInterpolation": "linear", - "lineWidth": 3, + "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 11, - "w": 9, - "x": 6, - "y": 44 - }, - "id": 53, + "h": 10, + "w": 8, + "x": 8, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 28, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -2536,25 +2486,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_streams_stream_state_metrics_put_if_absent_latency_avg{thread_id=~\".+$ksqldb_cluster_id.+\"}", + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_put_if_absent_latency_avg{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{thread_id}}", - "refId": "B" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Put if absent average latency", + "timeFrom": null, + "timeShift": null, + "title": "Put if absent Latency (Avg.)", "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": { - "type": "prometheus", - "uid": "a65Bu5Enk" - }, - "description": "", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -2573,52 +2536,48 @@ "viz": false }, "lineInterpolation": "linear", - "lineWidth": 3, + "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 11, - "w": 9, - "x": 15, - "y": 44 - }, - "id": 54, + "h": 10, + "w": 8, + "x": 16, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 29, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -2627,25 +2586,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_streams_stream_state_metrics_put_if_absent_latency_max{thread_id=~\".+$ksqldb_cluster_id.+\"}", + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_put_if_absent_latency_max{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{thread_id}}", - "refId": "B" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Put if absent max latency", + "timeFrom": null, + "timeShift": null, + "title": "Put if absent Latency (Max.)", "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": { - "type": "prometheus", - "uid": "a65Bu5Enk" - }, - "description": "", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -2664,53 +2636,48 @@ "viz": false }, "lineInterpolation": "linear", - "lineWidth": 3, + "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "iops" + "steps": [] + }, + "unit": "ops" }, "overrides": [] }, "gridPos": { - "h": 11, - "w": 6, + "h": 10, + "w": 8, "x": 0, - "y": 55 + "y": 9 }, - "id": 41, + "height": null, + "hideTimeOverride": false, + "id": 30, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -2719,21 +2686,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_streams_stream_state_metrics_fetch_rate{namespace=\"$ns\", thread_id=~\".+$ksqldb_cluster_id.+\", pod=~\"$pod\"}", + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_fetch_rate{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{thread_id}}", - "refId": "B" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], + "timeFrom": null, + "timeShift": null, "title": "Fetch Rate", "transformations": [], + "transparent": false, "type": "timeseries" }, { - "description": "", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -2752,52 +2736,48 @@ "viz": false }, "lineInterpolation": "linear", - "lineWidth": 3, + "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 11, - "w": 9, - "x": 6, - "y": 55 - }, - "id": 44, + "h": 10, + "w": 8, + "x": 8, + "y": 9 + }, + "height": null, + "hideTimeOverride": false, + "id": 31, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -2806,25 +2786,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_streams_stream_state_metrics_fetch_latency_avg{thread_id=~\".+$ksqldb_cluster_id.+\"}", + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_fetch_latency_avg{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{thread_id}}", - "refId": "B" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Fetch average latency", + "timeFrom": null, + "timeShift": null, + "title": "Fetch Latency (Avg.)", "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": { - "type": "prometheus", - "uid": "a65Bu5Enk" - }, - "description": "", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -2843,52 +2836,48 @@ "viz": false }, "lineInterpolation": "linear", - "lineWidth": 3, + "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 11, - "w": 9, - "x": 15, - "y": 55 - }, - "id": 45, + "h": 10, + "w": 8, + "x": 16, + "y": 9 + }, + "height": null, + "hideTimeOverride": false, + "id": 32, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -2897,25 +2886,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_streams_stream_state_metrics_put_latency_max{thread_id=~\".+$ksqldb_cluster_id.+\"}", + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_fetch_latency_max{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{thread_id}}", - "refId": "B" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Fetch max latency", + "timeFrom": null, + "timeShift": null, + "title": "Fetch Latency (Max.)", "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": { - "type": "prometheus", - "uid": "a65Bu5Enk" - }, - "description": "", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -2934,53 +2936,48 @@ "viz": false }, "lineInterpolation": "linear", - "lineWidth": 3, + "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "iops" + "steps": [] + }, + "unit": "ops" }, "overrides": [] }, "gridPos": { - "h": 11, - "w": 6, + "h": 10, + "w": 8, "x": 0, - "y": 66 + "y": 10 }, - "id": 46, + "height": null, + "hideTimeOverride": false, + "id": 33, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -2989,25 +2986,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_streams_stream_state_metrics_delete_rate{namespace=\"$ns\", thread_id=~\".+$ksqldb_cluster_id.+\", pod=~\"$pod\"}", + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_delete_rate{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{thread_id}}", - "refId": "B" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], + "timeFrom": null, + "timeShift": null, "title": "Delete Rate", "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": { - "type": "prometheus", - "uid": "a65Bu5Enk" - }, - "description": "", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -3026,52 +3036,48 @@ "viz": false }, "lineInterpolation": "linear", - "lineWidth": 3, + "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 11, - "w": 9, - "x": 6, - "y": 66 + "h": 10, + "w": 8, + "x": 8, + "y": 10 }, - "id": 47, + "height": null, + "hideTimeOverride": false, + "id": 34, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -3080,25 +3086,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_streams_stream_state_metrics_delete_latency_avg{thread_id=~\".+$ksqldb_cluster_id.+\"}", + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_delete_latency_avg{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{thread_id}}", - "refId": "B" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Delete average latency", + "timeFrom": null, + "timeShift": null, + "title": "Delete Latency (Avg.)", "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": { - "type": "prometheus", - "uid": "a65Bu5Enk" - }, - "description": "", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -3117,52 +3136,48 @@ "viz": false }, "lineInterpolation": "linear", - "lineWidth": 3, + "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 11, - "w": 9, - "x": 15, - "y": 66 + "h": 10, + "w": 8, + "x": 16, + "y": 10 }, - "id": 48, + "height": null, + "hideTimeOverride": false, + "id": 35, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -3171,25 +3186,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_streams_stream_state_metrics_delete_latency_max{thread_id=~\".+$ksqldb_cluster_id.+\"}", + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_delete_latency_max{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{thread_id}}", - "refId": "B" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Delete max latency", + "timeFrom": null, + "timeShift": null, + "title": "Delete Latency (Max.)", "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": { - "type": "prometheus", - "uid": "a65Bu5Enk" - }, - "description": "", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -3208,53 +3236,48 @@ "viz": false }, "lineInterpolation": "linear", - "lineWidth": 3, + "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "iops" + "steps": [] + }, + "unit": "ops" }, "overrides": [] }, "gridPos": { - "h": 11, - "w": 6, + "h": 10, + "w": 8, "x": 0, - "y": 77 + "y": 11 }, - "id": 49, + "height": null, + "hideTimeOverride": false, + "id": 36, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -3263,25 +3286,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_streams_stream_state_metrics_restore_rate{namespace=\"$ns\", thread_id=~\".+$ksqldb_cluster_id.+\", pod=~\"$pod\"}", + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_restore_rate{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{thread_id}}", - "refId": "B" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], + "timeFrom": null, + "timeShift": null, "title": "Restore Rate", "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": { - "type": "prometheus", - "uid": "a65Bu5Enk" - }, - "description": "", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -3300,52 +3336,48 @@ "viz": false }, "lineInterpolation": "linear", - "lineWidth": 3, + "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 11, - "w": 9, - "x": 6, - "y": 77 + "h": 10, + "w": 8, + "x": 8, + "y": 11 }, - "id": 50, + "height": null, + "hideTimeOverride": false, + "id": 37, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -3354,25 +3386,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_streams_stream_state_metrics_restore_latency_avg{thread_id=~\".+$ksqldb_cluster_id.+\"}", + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_restore_latency_avg{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{thread_id}}", - "refId": "B" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Restore average latency", + "timeFrom": null, + "timeShift": null, + "title": "Restore Latency (Avg.)", "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": { - "type": "prometheus", - "uid": "a65Bu5Enk" - }, - "description": "", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -3391,52 +3436,48 @@ "viz": false }, "lineInterpolation": "linear", - "lineWidth": 3, + "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 11, - "w": 9, - "x": 15, - "y": 77 + "h": 10, + "w": 8, + "x": 16, + "y": 11 }, - "id": 51, + "height": null, + "hideTimeOverride": false, + "id": 38, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -3445,128 +3486,174 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_streams_stream_state_metrics_restore_latency_max{thread_id=~\".+$ksqldb_cluster_id.+\"}", + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_restore_latency_max{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{thread_id}}", - "refId": "B" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Restore max latency", + "timeFrom": null, + "timeShift": null, + "title": "Restore Latency (Max.)", "transformations": [], + "transparent": false, "type": "timeseries" } ], - "title": "StateStore Metric", + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "State Stores", + "transformations": [], + "transparent": false, "type": "row" } ], - "refresh": "1m", - "schemaVersion": 35, + "refresh": "30s", + "rows": [], + "schemaVersion": 12, + "sharedCrosshair": false, "style": "dark", - "tags": [], + "tags": [ + "confluent", + "ksqldb" + ], "templating": { "list": [ { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { "selected": false, - "text": "confluent", - "value": "confluent" - }, - "datasource": { - "type": "prometheus", - "uid": "a65Bu5Enk" + "tags": [], + "text": null, + "value": null }, - "definition": "label_values(namespace)", + "datasource": "Prometheus", "hide": 0, "includeAll": false, - "label": "Namespace", + "label": "Environment", "multi": false, - "name": "ns", + "name": "env", "options": [], - "query": { - "query": "label_values(namespace)", - "refId": "Prometheus-ns-Variable-Query" - }, - "refresh": 2, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", + "query": "label_values(namespace)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, "type": "query", "useTags": false }, { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { "selected": false, - "text": "confluent.ksqldb_", - "value": "confluent.ksqldb_" - }, - "datasource": { - "type": "prometheus", - "uid": "a65Bu5Enk" + "tags": [], + "text": null, + "value": null }, - "definition": "label_values(ksql_ksql_engine_query_stats_liveness_indicator,ksql_cluster)", + "datasource": "Prometheus", "hide": 0, "includeAll": false, - "label": "Cluster ID", + "label": "ksqlDB cluster", "multi": false, - "name": "ksqldb_cluster_id", + "name": "ksqldb_cluster", "options": [], - "query": { - "query": "label_values(ksql_ksql_engine_query_stats_liveness_indicator,ksql_cluster)", - "refId": "Prometheus-ksqldb_cluster_id-Variable-Query" - }, - "refresh": 2, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", + "query": "label_values(ksql_ksql_engine_query_stats_num_active_queries{namespace=\"$env\"},app)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, "type": "query", "useTags": false }, { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { "selected": false, - "text": "All", - "value": "$__all" + "tags": [], + "text": null, + "value": null }, - "datasource": { - "type": "prometheus", - "uid": "a65Bu5Enk" + "datasource": "Prometheus", + "hide": 2, + "includeAll": false, + "label": "ksqlDB cluster ID", + "multi": false, + "name": "ksqldb_cluster_id", + "options": [], + "query": "label_values(ksql_ksql_engine_query_stats_num_active_queries{namespace=\"$env\"},ksql_cluster)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null }, - "definition": "label_values(ksql_ksql_engine_query_stats_liveness_indicator,pod)", + "datasource": "Prometheus", "hide": 0, "includeAll": true, - "label": "Pod", + "label": "ksqlDB server", "multi": true, - "name": "pod", + "name": "ksqldb_server", "options": [], - "query": { - "query": "label_values(ksql_ksql_engine_query_stats_liveness_indicator,pod)", - "refId": "Prometheus-instance-Variable-Query" - }, + "query": "label_values(ksql_ksql_engine_query_stats_num_active_queries{namespace=\"$env\",app=\"$ksqldb_cluster\"}, pod)", "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, "type": "query", "useTags": false } ] }, "time": { - "from": "now-12h", + "from": "now-1h", "to": "now" }, "timepicker": { + "hidden": false, "refresh_intervals": [ + "5s", "10s", "30s", "1m", @@ -3576,11 +3663,21 @@ "1h", "2h", "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" ] }, - "timezone": "", - "title": "ksqlDB cluster", - "uid": "pbx34foGk", - "version": 4, - "weekStart": "" -} \ No newline at end of file + "timezone": "browser", + "title": "ksqlDB cluster - v2", + "uid": null, + "version": 0 +} diff --git a/cfk-prometheus-grafana/grafana/schema-registry-cluster.json b/cfk-prometheus-grafana/grafana/schema-registry-cluster.json index bbdc9dd0..30f4c28e 100644 --- a/cfk-prometheus-grafana/grafana/schema-registry-cluster.json +++ b/cfk-prometheus-grafana/grafana/schema-registry-cluster.json @@ -1,686 +1,543 @@ { + "__inputs": [ + { + "description": "", + "label": "Prometheus", + "name": "DS_PROMETHEUS", + "pluginId": "prometheus", + "pluginName": "Prometheus", + "type": "datasource" + } + ], "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "target": { - "limit": 100, - "matchAny": false, - "tags": [], - "type": "dashboard" - }, - "type": "dashboard" - } - ] + "list": [] }, + "description": "Overview of the Schema Registry cluster", "editable": true, "gnetId": null, - "graphTooltip": 0, - "id": 4, - "iteration": 1632254298743, + "hideControls": false, + "id": null, "links": [], "panels": [ { + "cacheTimeout": null, "collapsed": false, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }, - "id": 19, + "height": null, + "hideTimeOverride": false, + "id": 1, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [], - "title": "Schemas", + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Overview", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "datasource": "Prometheus", + "description": "Schema Registry online instances returning metrics.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { + "custom": {}, + "decimals": null, "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { "color": "red", - "value": null + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" }, { "color": "green", - "value": 1 + "index": 2, + "line": true, + "op": "gt", + "value": 2.0, + "yaxis": "left" } ] - } + }, + "unit": "none" }, "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 0, - "y": 1 + "y": 0 }, + "height": null, + "hideTimeOverride": false, "id": 2, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ - "mean" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "count(kafka_schema_registry_registered_count{namespace=\"$ns\"})", - "instant": true, + "datasource": null, + "expr": "count(kafka_schema_registry_registered_count{namespace=\"$env\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Schema Registry Instances", + "title": "SR: Online instances", + "transformations": [], + "transparent": false, "type": "stat" }, { + "cacheTimeout": null, "datasource": "Prometheus", + "description": "Average number of registered schemas across the cluster.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, + "custom": {}, + "decimals": null, "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 4, - "y": 1 - }, - "id": 11, - "options": { - "legend": { - "calcs": [ - "lastNotNull" - ], - "displayMode": "table", - "placement": "right" - }, - "tooltip": { - "mode": "single" - } - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "exemplar": true, - "expr": "sum by(schema_type) (kafka_schema_registry_schemas_created{namespace=\"$ns\"})", - "interval": "", - "legendFormat": "{{schema_type}}", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Schema registered over time", - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - } - }, - "decimals": 0, - "mappings": [], - "unit": "short" + "unit": "none" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 5, "w": 4, - "x": 16, - "y": 1 + "x": 4, + "y": 0 }, - "id": 8, + "height": null, + "hideTimeOverride": false, + "id": 3, "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { - "legend": { - "calcs": [], - "displayMode": "table", - "placement": "bottom", - "values": [ - "value" - ] - }, - "pieType": "pie", + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "tooltip": { - "mode": "single" - } + "textMode": "auto" }, - "pluginVersion": "7.3.4", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum by(schema_type)(kafka_schema_registry_schemas_created{namespace=\"$ns\"})", - "instant": true, + "datasource": null, + "expr": "avg(kafka_schema_registry_registered_count{namespace=\"$env\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{schema_type}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Schemas created", - "type": "piechart" + "title": "SR: Registered Schemas (avg.)", + "transformations": [], + "transparent": false, + "type": "stat" }, { "cacheTimeout": null, "datasource": "Prometheus", + "description": "Average number of schemas created, by type.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - } - }, - "decimals": 0, - "mappings": [], - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 4, - "x": 20, - "y": 1 - }, - "id": 9, - "interval": null, - "links": [], - "options": { - "legend": { - "calcs": [], - "displayMode": "table", - "placement": "bottom", - "values": [ - "value" - ] - }, - "pieType": "pie", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "tooltip": { - "mode": "single" - } - }, - "pluginVersion": "7.3.4", - "targets": [ - { - "exemplar": true, - "expr": "avg by(schema_type)(kafka_schema_registry_schemas_deleted{namespace=\"$ns\"})", - "instant": true, - "interval": "", - "legendFormat": "{{schema_type}}", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Schemas deleted", - "type": "piechart" - }, - { - "datasource": "Prometheus", - "fieldConfig": { - "defaults": { + "custom": {}, + "decimals": null, "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] - } + }, + "unit": "none" }, "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, - "x": 0, - "y": 5 + "x": 8, + "y": 0 }, - "id": 25, + "height": null, + "hideTimeOverride": false, + "id": 4, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ - "mean" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(kafka_schema_registry_registered_count{namespace=\"$ns\"})", - "instant": true, + "datasource": null, + "expr": "avg(kafka_schema_registry_schemas_created{namespace=\"$env\"}) by (schema_type)", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{schema_type}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Schemas registered", + "title": "SR: Created Schemas by Type (avg.)", + "transformations": [], + "transparent": false, "type": "stat" }, { - "collapsed": false, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 9 - }, - "id": 15, - "panels": [], - "title": "System", - "type": "row" - }, - { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], + "custom": {}, + "decimals": null, "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] }, - "unit": "percent" + "unit": "none" }, "overrides": [] }, "gridPos": { - "h": 7, - "w": 8, - "x": 0, - "y": 10 + "h": 5, + "w": 4, + "x": 12, + "y": 0 }, - "id": 13, + "height": null, + "hideTimeOverride": false, + "id": 5, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { - "legend": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { "calcs": [ - "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "table", - "placement": "bottom" + "fields": "", + "values": false }, - "tooltip": { - "mode": "single" - } + "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "irate(process_cpu_seconds_total{namespace=\"$ns\"}[5m])*100", + "datasource": null, + "expr": "sum(kafka_schema_registry_schemas_deleted{namespace=\"$env\"}) by (schema_type)", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{schema_type}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "CPU Usage", - "type": "timeseries" + "title": "SR: Sum of Deleted Schemas by Type", + "transformations": [], + "transparent": false, + "type": "stat" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], + "custom": {}, + "decimals": null, "mappings": [], - "min": 0, + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] }, - "unit": "bytes" + "unit": "none" }, "overrides": [] }, "gridPos": { - "h": 7, - "w": 8, - "x": 8, - "y": 10 + "h": 5, + "w": 4, + "x": 16, + "y": 0 }, - "id": 21, + "height": null, + "hideTimeOverride": false, + "id": 6, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { - "legend": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { "calcs": [ - "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "table", - "placement": "bottom" + "fields": "", + "values": false }, - "tooltip": { - "mode": "single" - } + "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum without(area)(jvm_memory_bytes_used{namespace=\"$ns\"})", - "interval": "", - "legendFormat": "Used:{{instance}}", - "refId": "A" - }, - { - "expr": "jvm_memory_bytes_max{namespace=\"$ns\",area=\"heap\"}", + "datasource": null, + "expr": "sum(kafka_schema_registry_kafka_schema_registry_metrics_connection_count)", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "Max:{{instance}}", - "refId": "B" + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "JVM Memory Used", - "type": "timeseries" + "title": "SR: Sum of Active Connections", + "transformations": [], + "transparent": false, + "type": "stat" }, { - "datasource": "Prometheus", - "description": "", + "cacheTimeout": null, + "collapsed": false, + "datasource": null, + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "decimals": 3, - "links": [], - "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 8, - "x": 16, - "y": 10 - }, - "id": 23, - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" + "steps": [] + } } }, - "pluginVersion": "8.1.3", - "targets": [ - { - "expr": "sum without(gc)(rate(jvm_gc_collection_seconds_sum{namespace=\"$ns\"}[5m]))", - "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Time spent in GC", - "type": "timeseries" - }, - { - "datasource": null, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 17 + "y": 1 }, - "id": 17, - "title": "Connections", + "height": null, + "hideTimeOverride": false, + "id": 7, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "System", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -691,7 +548,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -702,14 +559,12 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } @@ -717,34 +572,32 @@ "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "short" + "unit": "percentunit" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 10, "w": 8, "x": 0, - "y": 18 + "y": 1 }, - "id": 4, + "height": null, + "hideTimeOverride": false, + "id": 8, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -753,22 +606,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_schema_registry_jetty_metrics_connections_active{namespace=\"$ns\"}", + "datasource": null, + "expr": "irate(process_cpu_seconds_total{namespace=\"$env\",pod=~\"$sr_server\"}[5m])", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Active Connections", + "title": "CPU usage", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -779,7 +648,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -790,14 +659,12 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } @@ -805,34 +672,32 @@ "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "short" + "unit": "bytes" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 10, "w": 8, "x": 8, - "y": 18 + "y": 1 }, - "id": 6, + "height": null, + "hideTimeOverride": false, + "id": 9, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -841,22 +706,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_schema_registry_jersey_metrics_request_rate{namespace=\"$ns\"}", + "datasource": null, + "expr": "sum without(area)(jvm_memory_bytes_used{namespace=\"$env\",pod=~\"$sr_server\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Requests Rate", + "title": "Memory usage", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -867,7 +748,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -878,14 +759,12 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } @@ -893,34 +772,32 @@ "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "short" + "unit": "percentunit" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 10, "w": 8, "x": 16, - "y": 18 + "y": 1 }, - "id": 24, + "height": null, + "hideTimeOverride": false, + "id": 10, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -929,66 +806,132 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_schema_registry_jersey_metrics_request_latency_99{namespace=\"$ns\"}", + "datasource": null, + "expr": "sum without(gc)(irate(jvm_gc_collection_seconds_sum{namespace=\"$env\",pod=~\"$sr_server\"}[5m]))", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Requests latency 99p", + "title": "GC collection", + "transformations": [], + "transparent": false, "type": "timeseries" } ], - "refresh": "1m", - "schemaVersion": 30, + "refresh": "30s", + "rows": [], + "schemaVersion": 12, + "sharedCrosshair": false, "style": "dark", - "tags": [], + "tags": [ + "confluent", + "schema-registry" + ], "templating": { "list": [ { "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { "selected": false, - "text": "dev", - "value": "dev" + "tags": [], + "text": null, + "value": null }, "datasource": "Prometheus", - "definition": "label_values(namespace)", - "description": null, - "error": null, "hide": 0, "includeAll": false, - "label": "Namespace", + "label": "Environment", "multi": false, - "name": "ns", + "name": "env", "options": [], - "query": { - "query": "label_values(namespace)", - "refId": "Prometheus-ns-Variable-Query" + "query": "label_values(namespace)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null }, + "datasource": "Prometheus", + "hide": 0, + "includeAll": true, + "label": "Server", + "multi": true, + "name": "sr_server", + "options": [], + "query": "label_values(kafka_schema_registry_registered_count{namespace=\"$env\"}, pod)", "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, "type": "query", "useTags": false } ] }, "time": { - "from": "now-12h", + "from": "now-1h", "to": "now" }, - "timepicker": {}, - "timezone": "", - "title": "Schema Registry cluster", - "uid": "9ixzve-Mk", - "version": 2 -} \ No newline at end of file + "timepicker": { + "hidden": false, + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Schema Registry cluster - v2", + "uid": null, + "version": 0 +} diff --git a/cfk-prometheus-grafana/grafana/zookeeper-cluster.json b/cfk-prometheus-grafana/grafana/zookeeper-cluster.json index 676e1d46..3fa22d69 100644 --- a/cfk-prometheus-grafana/grafana/zookeeper-cluster.json +++ b/cfk-prometheus-grafana/grafana/zookeeper-cluster.json @@ -1,76 +1,103 @@ { + "__inputs": [ + { + "description": "", + "label": "Prometheus", + "name": "DS_PROMETHEUS", + "pluginId": "prometheus", + "pluginName": "Prometheus", + "type": "datasource" + } + ], "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "target": { - "limit": 100, - "matchAny": false, - "tags": [], - "type": "dashboard" - }, - "type": "dashboard" - } - ] + "list": [] }, + "description": "Overview of the Zookeeper cluster", "editable": true, "gnetId": null, - "graphTooltip": 0, - "id": 5, - "iteration": 1632253434096, + "hideControls": false, + "id": null, "links": [], "panels": [ { + "cacheTimeout": null, + "collapsed": false, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }, - "id": 22, - "title": "Health Check", + "height": null, + "hideTimeOverride": false, + "id": 1, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Overview", + "transformations": [], + "transparent": false, "type": "row" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Quorum Size of Zookeeper ensemble", + "description": "Quorum Size of Zookeeper ensemble.\n Count Zookeeper servers with quorum size metric.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#d44a3a", - "value": null + "color": "red", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" }, { - "color": "rgba(237, 129, 40, 0.89)", - "value": 2 + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 2.0, + "yaxis": "left" }, { - "color": "#299c46", - "value": 3 + "color": "green", + "index": 2, + "line": true, + "op": "gt", + "value": 3.0, + "yaxis": "left" } ] }, @@ -79,78 +106,81 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 0, - "y": 1 + "y": 0 }, + "height": null, + "hideTimeOverride": false, "id": 2, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "count(zookeeper_status_quorumsize{namespace=\"$ns\"})", + "datasource": null, + "expr": "count(zookeeper_status_quorumsize{namespace=\"$env\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Zookeeper nodes online", + "title": "ZK: Quorum Size", + "transformations": [], + "transparent": false, "type": "stat" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Number of Alive Connections", + "description": "Average size of ZNodes in the cluster.\n Getting the node count per server, and averaging the node count.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "dark-green", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 100 - }, - { - "color": "#d44a3a", - "value": 200 + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] }, @@ -159,163 +189,180 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 4, - "y": 1 + "y": 0 }, - "id": 4, + "height": null, + "hideTimeOverride": false, + "id": 3, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(zookeeper_numaliveconnections{namespace=\"$ns\"})", + "datasource": null, + "expr": "avg(zookeeper_inmemorydatatree_nodecount{namespace=\"$env\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Alive Connections", + "title": "ZK: ZNodes (avg.)", + "transformations": [], + "transparent": false, "type": "stat" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "Number of queued requests in the server. This goes up when the server receives more requests than it can process", + "description": "Sum of the number of alive connections per servers divided by the maximum number of client connections allowed per host.\n If the percentage is higher than 60%, then Zookeeper should be scaled and/or the Zookeeper clients should be investigated to find the reason for high number of connections opened.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "line+area" - } - }, - "decimals": 0, - "links": [], + "custom": {}, + "decimals": null, "mappings": [], - "min": 0, + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "transparent", - "value": null + "color": "green", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 0.6, + "yaxis": "left" }, { "color": "red", - "value": 10 + "index": 2, + "line": true, + "op": "gt", + "value": 0.8, + "yaxis": "left" } ] }, - "unit": "short" + "unit": "percentunit" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 16, + "h": 5, + "w": 4, "x": 8, - "y": 1 + "y": 0 }, - "id": 7, + "height": null, + "hideTimeOverride": false, + "id": 4, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { - "legend": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { "calcs": [ - "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "table", - "placement": "bottom" + "fields": "", + "values": false }, - "tooltip": { - "mode": "single" - } + "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "zookeeper_outstandingrequests{namespace=\"$ns\"}", + "datasource": null, + "expr": "zookeeper_numaliveconnections{namespace=\"$env\"} / zookeeper_maxclientcnxnsperhost{namespace=\"$env\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{server_id}}:{{member_type}} ({{pod}})", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Outstanding Requests", - "type": "timeseries" + "title": "ZK: Connections used", + "transformations": [], + "transparent": false, + "type": "stat" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": "Sum of client watchers subscribed to changes on the ZNodes.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "dark-green", - "value": null + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] }, @@ -324,139 +371,230 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, - "x": 0, - "y": 5 + "x": 12, + "y": 0 }, - "id": 3, + "height": null, + "hideTimeOverride": false, + "id": 5, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "avg(zookeeper_inmemorydatatree_nodecount{namespace=\"$ns\"})", + "datasource": null, + "expr": "sum(zookeeper_inmemorydatatree_watchcount{namespace=\"$env\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Number of ZNodes", + "title": "ZK: Sum of watchers", + "transformations": [], + "transparent": false, "type": "stat" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Number of Watchers", + "description": "Number of requests waiting for processing (queued).\n If the number of outstanding requests grows higher than 10, then the Zookeeper hosts should be checked.\n It could mean that there is not enough resources to cope with the number of requests.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { - "mode": "thresholds" + "mode": "palette-classic" }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" } - ], + }, + "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { - "color": "dark-green", - "value": null + "color": "green", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" }, { - "color": "rgba(237, 129, 40, 0.89)", - "value": 500 + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" }, { - "color": "#d44a3a", - "value": 1000 + "color": "red", + "index": 2, + "line": true, + "op": "gt", + "value": 10.0, + "yaxis": "left" } ] }, - "unit": "none" + "unit": "" }, "overrides": [] }, "gridPos": { - "h": 4, - "w": 4, - "x": 4, - "y": 5 + "h": 5, + "w": 8, + "x": 16, + "y": 0 }, - "id": 5, + "height": null, + "hideTimeOverride": false, + "id": 6, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "reduceOptions": { + "legend": { "calcs": [ - "lastNotNull" + "max", + "last" ], - "fields": "", - "values": false + "displayMode": "table", + "placement": "right" }, - "text": {}, - "textMode": "auto" + "tooltip": { + "mode": "single" + } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(zookeeper_inmemorydatatree_watchcount{namespace=\"$ns\"})", + "datasource": null, + "expr": "zookeeper_outstandingrequests{namespace=\"$env\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{pod}} ({{server_id}}:{{member_type}})", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Number of Watchers", - "type": "stat" + "title": "ZK: Outstanding Requests", + "transformations": [], + "transparent": false, + "type": "timeseries" }, { + "cacheTimeout": null, + "collapsed": false, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 9 + "y": 1 }, - "id": 20, + "height": null, + "hideTimeOverride": false, + "id": 7, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "System", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -467,7 +605,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -478,46 +616,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "steps": [] }, - "unit": "percent" + "unit": "percentunit" }, "overrides": [] }, "gridPos": { - "h": 7, + "h": 10, "w": 8, "x": 0, - "y": 10 + "y": 1 }, - "id": 12, + "height": null, + "hideTimeOverride": false, + "id": 8, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -526,23 +663,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "irate(process_cpu_seconds_total{namespace=\"$ns\"}[5m])*100", + "datasource": null, + "expr": "irate(process_cpu_seconds_total{namespace=\"$env\",pod=~\"$zk_server\"}[5m])", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{pod}}", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "CPU Usage", + "title": "CPU usage", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -553,7 +705,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -564,47 +716,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "steps": [] }, "unit": "bytes" }, "overrides": [] }, "gridPos": { - "h": 7, + "h": 10, "w": 8, "x": 8, - "y": 10 + "y": 1 }, - "id": 13, + "height": null, + "hideTimeOverride": false, + "id": 9, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -613,29 +763,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum without(area)(jvm_memory_bytes_used{namespace=\"$ns\"})", + "datasource": null, + "expr": "sum without(area)(jvm_memory_bytes_used{namespace=\"$env\",pod=\"$zk_server\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "Used:{{pod}}", - "refId": "A" - }, - { - "expr": "jvm_memory_bytes_max{namespace=\"$env\",area=\"heap\"}", - "interval": "", - "legendFormat": "Max:{{pod}}", - "refId": "B" + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "JVM Memory Used", + "title": "Memory usage", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -646,7 +805,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -657,48 +816,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "decimals": 3, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "steps": [] }, "unit": "percentunit" }, "overrides": [] }, "gridPos": { - "h": 7, + "h": 10, "w": 8, "x": 16, - "y": 10 + "y": 1 }, - "id": 14, + "height": null, + "hideTimeOverride": false, + "id": 10, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -707,329 +863,1028 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum without(gc)(rate(jvm_gc_collection_seconds_sum{namespace=\"$ns\"}[5m]))", + "datasource": null, + "expr": "sum without(gc)(irate(jvm_gc_collection_seconds_sum{namespace=\"$env\",pod=\"$zk_server\"}[5m]))", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{pod}}", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Time spent in GC", + "title": "GC collection", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, + "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 17 + "y": 2 }, - "id": 18, - "title": "Request Latency", - "type": "row" - }, - { - "datasource": "Prometheus", - "description": "Amount of time it takes for the server to respond to a client request", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "height": null, + "hideTimeOverride": false, + "id": 11, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 12, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" }, - "thresholdsStyle": { - "mode": "off" + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "zookeeper_minrequestlatency{namespace=\"$env\"} * zookeeper_ticktime", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } + ], + "timeFrom": null, + "timeShift": null, + "title": "ZK: Request Latency (Minimum)", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] }, - "decimals": 0, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 13, + "interval": null, "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 8, - "x": 0, - "y": 18 - }, - "id": 9, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "zookeeper_avgrequestlatency{namespace=\"$env\"} * zookeeper_ticktime", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ZK: Request Latency (Average)", + "transformations": [], + "transparent": false, + "type": "timeseries" }, - "tooltip": { - "mode": "single" - } - }, - "pluginVersion": "8.1.3", - "targets": [ { - "expr": "zookeeper_minrequestlatency{namespace=\"$ns\"} * zookeeper_ticktime", - "interval": "", - "legendFormat": "{{server_id}}:{{member_type}} ({{pod}})", - "refId": "A" + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 14, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "zookeeper_maxrequestlatency{namespace=\"$env\"} * zookeeper_ticktime", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ZK: Request Latency (Maximum)", + "transformations": [], + "transparent": false, + "type": "timeseries" } ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], "timeFrom": null, "timeShift": null, - "title": "Request Latency - Minimum", - "type": "timeseries" + "title": "Server Latency", + "transformations": [], + "transparent": false, + "type": "row" }, { - "datasource": "Prometheus", - "description": "Amount of time it takes for the server to respond to a client request", + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "decimals": 0, - "links": [], - "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "ms" - }, - "overrides": [] + "steps": [] + } + } }, "gridPos": { - "h": 7, - "w": 8, - "x": 8, - "y": 18 - }, - "id": 10, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } + "h": 1, + "w": 24, + "x": 0, + "y": 3 }, - "pluginVersion": "8.1.3", - "targets": [ + "height": null, + "hideTimeOverride": false, + "id": 15, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ { - "exemplar": true, - "expr": "zookeeper_avgrequestlatency{namespace=\"$ns\"} * zookeeper_ticktime", - "interval": "", - "legendFormat": "{{server_id}}:{{member_type}} ({{pod}})", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Request Latency - Average", - "type": "timeseries" - }, - { - "datasource": "Prometheus", - "description": "Amount of time it takes for the server to respond to a client request", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 16, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_server_zookeeperclientmetrics_zookeeperrequestlatencyms{namespace=\"$env\",quantile=~\"$quantile\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Request Latency", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 17, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" }, - "thresholdsStyle": { - "mode": "off" + "tooltip": { + "mode": "single" } }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_server_sessionexpirelistener_zookeepersyncconnectspersec{namespace=\"$env\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Sync Connections/sec", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 18, + "interval": null, "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } }, - "unit": "ms" + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_server_sessionexpirelistener_zookeeperexpirespersec{namespace=\"$env\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Expired Connections/sec", + "transformations": [], + "transparent": false, + "type": "timeseries" }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 8, - "x": 16, - "y": 18 - }, - "id": 11, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 19, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_server_sessionexpirelistener_zookeeperdisconnectspersec{namespace=\"$env\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Disconnected Connections/sec", + "transformations": [], + "transparent": false, + "type": "timeseries" }, - "tooltip": { - "mode": "single" - } - }, - "pluginVersion": "8.1.3", - "targets": [ { - "expr": "zookeeper_maxrequestlatency{namespace=\"$ns\"} * zookeeper_ticktime", - "interval": "", - "legendFormat": "{{server_id}}:{{member_type}} ({{pod}})", - "refId": "A" + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 20, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_server_sessionexpirelistener_zookeeperauthfailurespersec{namespace=\"$env\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Auth Failures on Connections/sec", + "transformations": [], + "transparent": false, + "type": "timeseries" } ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], "timeFrom": null, "timeShift": null, - "title": "Request Latency - Maximum", - "type": "timeseries" + "title": "Client Latency (Kafka)", + "transformations": [], + "transparent": false, + "type": "row" } ], - "refresh": "1m", - "schemaVersion": 30, + "refresh": "30s", + "rows": [], + "schemaVersion": 12, + "sharedCrosshair": false, "style": "dark", - "tags": [], + "tags": [ + "confluent", + "kafka", + "zookeeper" + ], "templating": { "list": [ { "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { "selected": false, - "text": "dev", - "value": "dev" + "tags": [], + "text": null, + "value": null }, "datasource": "Prometheus", - "definition": "label_values(namespace)", - "description": null, - "error": null, "hide": 0, "includeAll": false, - "label": "Namespace", + "label": "Environment", "multi": false, - "name": "ns", + "name": "env", "options": [], - "query": { - "query": "label_values(namespace)", - "refId": "Prometheus-ns-Variable-Query" + "query": "label_values(namespace)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null }, + "datasource": "Prometheus", + "hide": 0, + "includeAll": true, + "label": "Server", + "multi": true, + "name": "zk_server", + "options": [], + "query": "label_values(zookeeper_outstandingrequests{namespace=\"$env\"}, pod)", "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "Prometheus", + "hide": 0, + "includeAll": false, + "label": "Quantile", + "multi": false, + "name": "quantile", + "options": [], + "query": "label_values(kafka_server_zookeeperclientmetrics_zookeeperrequestlatencyms{namespace=\"$env\"}, quantile)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, "type": "query", "useTags": false } ] }, "time": { - "from": "now-12h", + "from": "now-1h", "to": "now" }, "timepicker": { + "hidden": false, "refresh_intervals": [ + "5s", "10s", "30s", "1m", @@ -1039,10 +1894,21 @@ "1h", "2h", "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" ] }, - "timezone": "", - "title": "Zookeeper cluster", - "uid": "H4xS98vWk", - "version": 1 -} \ No newline at end of file + "timezone": "browser", + "title": "Zookeeper cluster - v2", + "uid": null, + "version": 0 +} diff --git a/grafana-dashboards/Makefile b/grafana-dashboards/Makefile index 0d542fd2..289c29b2 100644 --- a/grafana-dashboards/Makefile +++ b/grafana-dashboards/Makefile @@ -2,7 +2,7 @@ all: $(MAKE) def $(MAKE) cfk -def: OUTPUT_DIR=default +def: OUTPUT_DIR=../jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards def: export DATASOURCE=Prometheus def: export ENV_LABEL=env def: export SERVER_LABEL=hostname @@ -25,7 +25,7 @@ dashboards: @generate-dashboard kafka-consumer.py -o $(OUTPUT_DIR)/kafka-consumer.json @generate-dashboard kafka-quotas.py -o $(OUTPUT_DIR)/kafka-quotas.json -cfk: OUTPUT_DIR=cfk +cfk: OUTPUT_DIR=../cfk-prometheus-grafana/grafana cfk: export DATASOURCE=$${DS_PROMETHEUS} cfk: export ENV_LABEL=namespace cfk: export SERVER_LABEL=pod diff --git a/grafana-dashboards/cfk/confluent-platform.json b/grafana-dashboards/cfk/confluent-platform.json deleted file mode 100644 index d94f8840..00000000 --- a/grafana-dashboards/cfk/confluent-platform.json +++ /dev/null @@ -1,2681 +0,0 @@ -{ - "__inputs": [ - { - "description": "", - "label": "Prometheus", - "name": "DS_PROMETHEUS", - "pluginId": "prometheus", - "pluginName": "Prometheus", - "type": "datasource" - } - ], - "annotations": { - "list": [] - }, - "description": "Overview of the main health-check metrics from Confluent Platform components.", - "editable": true, - "gnetId": null, - "hideControls": false, - "id": null, - "links": [], - "panels": [ - { - "cacheTimeout": null, - "collapsed": false, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 1, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Zookeeper cluster", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Quorum Size of Zookeeper ensemble.\n Count Zookeeper servers with quorum size metric.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "red", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - }, - { - "color": "yellow", - "index": 1, - "line": true, - "op": "gt", - "value": 2.0, - "yaxis": "left" - }, - { - "color": "green", - "index": 2, - "line": true, - "op": "gt", - "value": 3.0, - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 0, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 2, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "count(zookeeper_status_quorumsize{namespace=\"$env\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "ZK: Quorum Size", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Average size of ZNodes in the cluster.\n Getting the node count per server, and averaging the node count.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 4, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 3, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "avg(zookeeper_inmemorydatatree_nodecount{namespace=\"$env\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "ZK: ZNodes (avg.)", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Sum of the number of alive connections per servers divided by the maximum number of client connections allowed per host.\n If the percentage is higher than 60%, then Zookeeper should be scaled and/or the Zookeeper clients should be investigated to find the reason for high number of connections opened.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - }, - { - "color": "yellow", - "index": 1, - "line": true, - "op": "gt", - "value": 0.6, - "yaxis": "left" - }, - { - "color": "red", - "index": 2, - "line": true, - "op": "gt", - "value": 0.8, - "yaxis": "left" - } - ] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 8, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 4, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "zookeeper_numaliveconnections{namespace=\"$env\"} / zookeeper_maxclientcnxnsperhost{namespace=\"$env\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "ZK: Connections used", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Sum of client watchers subscribed to changes on the ZNodes.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 12, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 5, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(zookeeper_inmemorydatatree_watchcount{namespace=\"$env\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "ZK: Sum of watchers", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Number of requests waiting for processing (queued).\n If the number of outstanding requests grows higher than 10, then the Zookeeper hosts should be checked.\n It could mean that there is not enough resources to cope with the number of requests.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - }, - { - "color": "yellow", - "index": 1, - "line": true, - "op": "gt", - "value": 1.0, - "yaxis": "left" - }, - { - "color": "red", - "index": 2, - "line": true, - "op": "gt", - "value": 10.0, - "yaxis": "left" - } - ] - }, - "unit": "" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 8, - "x": 16, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 6, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "last" - ], - "displayMode": "table", - "placement": "right" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "zookeeper_outstandingrequests{namespace=\"$env\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod}} ({{server_id}}:{{member_type}})", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "ZK: Outstanding Requests", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "collapsed": false, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 1 - }, - "height": null, - "hideTimeOverride": false, - "id": 7, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Kafka cluster", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Count of brokers available (online).\n This value is referential and should not be used for alerting.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 0, - "y": 1 - }, - "height": null, - "hideTimeOverride": false, - "id": 8, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "count(kafka_server_replicamanager_leadercount{namespace=\"$env\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Kafka: Online Brokers", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Active Controller broker.\n It should always be 1. If the value is different than 1, then it must be alerted for troubleshooting.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 4, - "y": 1 - }, - "height": null, - "hideTimeOverride": false, - "id": 9, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "value_and_name" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_controller_kafkacontroller_activecontrollercount{namespace=\"$env\"} > 0", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Kafka: Active Controller", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Sum of Topic partitions across the cluster.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 8, - "y": 1 - }, - "height": null, - "hideTimeOverride": false, - "id": 10, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(kafka_server_replicamanager_partitioncount{namespace=\"$env\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Kafka: Sum of Partitions", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Sum of Under-Replicated Partitions. This is caused by broker or volumes unavailable, impacting replicas to be offline, and reducing the ISR set for those partitions.\n There are transient scenarios that could lead to this number growing (e.g. broker restart), but if the number doesn't shrink in a short period of time (e.g. 1 minute), then it's recommended to alert.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - }, - { - "color": "red", - "index": 1, - "line": true, - "op": "gt", - "value": 1.0, - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 12, - "y": 1 - }, - "height": null, - "hideTimeOverride": false, - "id": 11, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(kafka_server_replicamanager_underreplicatedpartitions{namespace=\"$env\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Kafka: Sum of Under-Replicated Partitions (URP)", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Number of partitions where the number of replicas offline is higher than the minimum ISR configuration.\n This means partitions are not available for Producers with acks=all.\n It's recommended alerting when this values is higher than 0.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - }, - { - "color": "red", - "index": 1, - "line": true, - "op": "gt", - "value": 1.0, - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 16, - "y": 1 - }, - "height": null, - "hideTimeOverride": false, - "id": 12, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(kafka_cluster_partition_underminisr{namespace=\"$env\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Kafka: Sum of Under-MinISR Partitions", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Number of partitions where all replicas are offline.\n Producers and Consumers are affected by this condition.\n It's recommended alerting when this values is higher than 0.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - }, - { - "color": "red", - "index": 1, - "line": true, - "op": "gt", - "value": 1.0, - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 20, - "y": 1 - }, - "height": null, - "hideTimeOverride": false, - "id": 13, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(kafka_controller_kafkacontroller_offlinepartitionscount{namespace=\"$env\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Kafka: Sum of Offline Partitions", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "collapsed": false, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 2 - }, - "height": null, - "hideTimeOverride": false, - "id": 14, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Schema Registry cluster", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Schema Registry online instances returning metrics.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "red", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - }, - { - "color": "yellow", - "index": 1, - "line": true, - "op": "gt", - "value": 1.0, - "yaxis": "left" - }, - { - "color": "green", - "index": 2, - "line": true, - "op": "gt", - "value": 2.0, - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 0, - "y": 2 - }, - "height": null, - "hideTimeOverride": false, - "id": 15, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "count(kafka_schema_registry_registered_count{namespace=\"$env\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "SR: Online instances", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Average number of registered schemas across the cluster.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 4, - "y": 2 - }, - "height": null, - "hideTimeOverride": false, - "id": 16, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "avg(kafka_schema_registry_registered_count{namespace=\"$env\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "SR: Registered Schemas (avg.)", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Average number of schemas created, by type.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 8, - "y": 2 - }, - "height": null, - "hideTimeOverride": false, - "id": 17, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "avg(kafka_schema_registry_schemas_created{namespace=\"$env\"}) by (schema_type)", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{schema_type}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "SR: Schemas Created by Type (avg.)", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Average number of schemas deleted, by type.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 12, - "y": 2 - }, - "height": null, - "hideTimeOverride": false, - "id": 18, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(kafka_schema_registry_schemas_deleted{namespace=\"$env\"}) by (schema_type)", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{schema_type}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "SR: Schemas Deleted by Type (avg.)", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 3 - }, - "height": null, - "hideTimeOverride": false, - "id": 19, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Kafka Connect online workers returning metrics.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 0, - "y": 3 - }, - "height": null, - "hideTimeOverride": false, - "id": 20, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "count(kafka_connect_connect_worker_metrics_connector_count{namespace=\"$env\",app=~\"$connect_cluster\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Connect: Online Workers", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Number of tasks deployed on Kafka Connect cluster.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 4, - "y": 3 - }, - "height": null, - "hideTimeOverride": false, - "id": 21, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(kafka_connect_connect_worker_metrics_connector_total_task_count{namespace=\"$env\",app=~\"$connect_cluster\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Connect: Sum of Total Tasks", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Number of Running Tasks on the Kafka Connect cluster.\n Ideally, this number should be equal to the total number of tasks deployed.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - }, - { - "color": "green", - "index": 1, - "line": true, - "op": "gt", - "value": 1.0, - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 8, - "y": 3 - }, - "height": null, - "hideTimeOverride": false, - "id": 22, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(kafka_connect_connect_worker_metrics_connector_running_task_count{namespace=\"$env\",app=~\"$connect_cluster\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Connect: Sum of Running Tasks", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Number of Paused Tasks on the Kafka Connect cluster.\n Ideally, this number should be zero, as tasks should be running.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - }, - { - "color": "yellow", - "index": 1, - "line": true, - "op": "gt", - "value": 1.0, - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 12, - "y": 3 - }, - "height": null, - "hideTimeOverride": false, - "id": 23, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(kafka_connect_connect_worker_metrics_connector_paused_task_count{namespace=\"$env\",app=~\"$connect_cluster\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Connect: Sum of Paused Tasks", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Number of Paused Tasks on the Kafka Connect cluster.\n Ideally, this number should be zero, as tasks should be running.\n It's recommended alerting when this value is higher than 0.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - }, - { - "color": "red", - "index": 1, - "line": true, - "op": "gt", - "value": 1.0, - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 16, - "y": 3 - }, - "height": null, - "hideTimeOverride": false, - "id": 24, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(kafka_connect_connect_worker_metrics_connector_failed_task_count{namespace=\"$env\",app=~\"$connect_cluster\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Connect: Sum of Failed Tasks", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Informative value. Time since last rebalance.\n When this value is continuously and repeatedly low means some connectors are failing and rebalancing is triggered constantly.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - } - ] - }, - "unit": "clockms" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 20, - "y": 3 - }, - "height": null, - "hideTimeOverride": false, - "id": 25, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_connect_connect_worker_rebalance_metrics_time_since_last_rebalance_ms{namespace=\"$env\",app=~\"$connect_cluster\"} >= 0", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Connect: Time since last rebalance", - "transformations": [], - "transparent": false, - "type": "stat" - } - ], - "repeat": "connect_cluster", - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Kafka Connect cluster: $connect_cluster", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 4 - }, - "height": null, - "hideTimeOverride": false, - "id": 26, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "ksqlDB online instances returning metrics.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 0, - "y": 4 - }, - "height": null, - "hideTimeOverride": false, - "id": 27, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "count(ksql_ksql_engine_query_stats_num_active_queries{namespace=\"$env\",app=\"$ksqldb_cluster\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "ksqlDB: Online instances", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Number of active queries deployed in the cluster.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 4, - "y": 4 - }, - "height": null, - "hideTimeOverride": false, - "id": 28, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(ksql_ksql_engine_query_stats_num_active_queries{namespace=\"$env\",app=\"$ksqldb_cluster\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "ksqlDB: Sum of Active Queries", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Number of running queries deployed in the cluster.\n Ideally, this number should be equal to the number of active queries as queries should be running.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - }, - { - "color": "green", - "index": 1, - "line": true, - "op": "gt", - "value": 1.0, - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 8, - "y": 4 - }, - "height": null, - "hideTimeOverride": false, - "id": 29, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(ksql_ksql_engine_query_stats_running_queries{namespace=\"$env\",app=\"$ksqldb_cluster\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "ksqlDB: Sum of Running Queries", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Number of queries rebalancing in the cluster.\n Ideally, this number should be equal zero, or return to zero in a short period (e.g. 1 minute).\n It's recommended to alert if the number of rebalancing queries stay higher than 0 for a longer period of time.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - }, - { - "color": "yellow", - "index": 1, - "line": true, - "op": "gt", - "value": 1.0, - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 12, - "y": 4 - }, - "height": null, - "hideTimeOverride": false, - "id": 30, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(ksql_ksql_engine_query_stats_rebalancing_queries{namespace=\"$env\",app=\"$ksqldb_cluster\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "ksqlDB: Sum of Rebalancing Queries", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Number of queries failed in the cluster.\n Ideally, this number should be equal zero.\n It's recommended to alert if the number of queries failed is higher than 0.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - }, - { - "color": "red", - "index": 1, - "line": true, - "op": "gt", - "value": 1.0, - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 16, - "y": 4 - }, - "height": null, - "hideTimeOverride": false, - "id": 31, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "avg(ksql_ksql_engine_query_stats_error_queries{namespace=\"$env\",app=\"$ksqldb_cluster\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Connect: Sum of Queries Failed", - "transformations": [], - "transparent": false, - "type": "stat" - } - ], - "repeat": "ksqldb_cluster", - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "ksqlDB cluster: $ksqldb_cluster", - "transformations": [], - "transparent": false, - "type": "row" - } - ], - "refresh": "30s", - "rows": [], - "schemaVersion": 12, - "sharedCrosshair": false, - "style": "dark", - "tags": [ - "confluent", - "kafka", - "zookeeper", - "kafka-connect", - "schema-registry", - "ksqldb" - ], - "templating": { - "list": [ - { - "allValue": null, - "auto": false, - "auto_count": 30, - "auto_min": "10s", - "current": { - "selected": false, - "tags": [], - "text": null, - "value": null - }, - "datasource": "${DS_PROMETHEUS}", - "hide": 0, - "includeAll": false, - "label": "Environment", - "multi": false, - "name": "env", - "options": [], - "query": "label_values(namespace)", - "refresh": 1, - "regex": null, - "sort": 1, - "tagValuesQuery": null, - "tagsQuery": null, - "type": "query", - "useTags": false - }, - { - "allValue": null, - "auto": false, - "auto_count": 30, - "auto_min": "10s", - "current": { - "selected": false, - "tags": [], - "text": null, - "value": null - }, - "datasource": "${DS_PROMETHEUS}", - "hide": true, - "includeAll": false, - "label": "Kafka Connect cluster", - "multi": false, - "name": "connect_cluster", - "options": [], - "query": "label_values(kafka_connect_connect_worker_metrics_connector_count{namespace=\"$env\"}, app)", - "refresh": 1, - "regex": null, - "sort": 1, - "tagValuesQuery": null, - "tagsQuery": null, - "type": "query", - "useTags": false - }, - { - "allValue": null, - "auto": false, - "auto_count": 30, - "auto_min": "10s", - "current": { - "selected": false, - "tags": [], - "text": null, - "value": null - }, - "datasource": "${DS_PROMETHEUS}", - "hide": true, - "includeAll": false, - "label": "ksqlDB cluster", - "multi": false, - "name": "ksqldb_cluster", - "options": [], - "query": "label_values(ksql_ksql_engine_query_stats_liveness_indicator{namespace=\"$env\"}, app)", - "refresh": 1, - "regex": null, - "sort": 1, - "tagValuesQuery": null, - "tagsQuery": null, - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "hidden": false, - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "browser", - "title": "Confluent Platform overview - v2", - "uid": null, - "version": 0 -} diff --git a/grafana-dashboards/cfk/kafka-cluster.json b/grafana-dashboards/cfk/kafka-cluster.json deleted file mode 100644 index 3db845de..00000000 --- a/grafana-dashboards/cfk/kafka-cluster.json +++ /dev/null @@ -1,5537 +0,0 @@ -{ - "__inputs": [ - { - "description": "", - "label": "Prometheus", - "name": "DS_PROMETHEUS", - "pluginId": "prometheus", - "pluginName": "Prometheus", - "type": "datasource" - } - ], - "annotations": { - "list": [] - }, - "description": "Overview of the Kafka cluster", - "editable": true, - "gnetId": null, - "hideControls": false, - "id": null, - "links": [], - "panels": [ - { - "cacheTimeout": null, - "collapsed": false, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 1, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Cluster Overview", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Count of brokers available (online).\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 0, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 2, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "count(kafka_server_replicamanager_leadercount{namespace=\"$env\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Kafka: Online Brokers", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Active Controller broker.\n It should always be 1. If the value is different than 1, then it must be alerted for troubleshooting.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 4, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 3, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "value_and_name" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_controller_kafkacontroller_activecontrollercount{namespace=\"$env\"} > 0", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Kafka: Active Controller", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "\n Number of partitions where the preferred replica is not the leader.\n Usually, this number is 0.\n Restarting nodes could cause this values to change, but when reassigning happens the value stabilize.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 8, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 4, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(kafka_controller_kafkacontroller_preferredreplicaimbalancecount{namespace=\"$env\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Kafka: Sum of Preferred Replica Imbalance", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Number of topics in the cluster.", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 12, - "y": 1 - }, - "height": null, - "hideTimeOverride": false, - "id": 5, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(kafka_controller_kafkacontroller_globaltopiccount{namespace=\"$env\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Kafka: Sum of Topics", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Sum of requests per second rated over a 5 min. period.\n Gives an idea of the processing load in the cluster.", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - } - ] - }, - "unit": "reqps" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 16, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 6, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(rate(kafka_network_requestmetrics_requestspersec{namespace=\"$env\",pod=~\"$broker\"}[5m]))", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Kafka: Rate of Requests/Sec", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Sum of log sizes per broker.\n This must be compared with the total storage space available in the brokers.", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 20, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 7, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "value_and_name" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(kafka_log_log_size{namespace=\"$env\",pod=~\"$broker\"}) by (pod)", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Kafka: Log Size", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Sum of Topic partitions across the cluster.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 0, - "y": 1 - }, - "height": null, - "hideTimeOverride": false, - "id": 8, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(kafka_server_replicamanager_partitioncount{namespace=\"$env\",pod=~\"$broker\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Kafka: Sum of Partitions", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Sum of Under-Replicated Partitions. This is caused by broker or volumes unavailable, impacting replicas to be offline, and reducing the ISR set for those partitions.\n There are transient scenarios that could lead to this number growing (e.g. broker restart), but if the number doesn't shrink in a short period of time (e.g. 1 minute), then it's recommended to alert.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - }, - { - "color": "red", - "index": 1, - "line": true, - "op": "gt", - "value": 1.0, - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 4, - "y": 1 - }, - "height": null, - "hideTimeOverride": false, - "id": 9, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(kafka_server_replicamanager_underreplicatedpartitions{namespace=\"$env\",pod=~\"$broker\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Kafka: Sum of Under-Replicated Partitions (URP)", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Number of partitions where the number of replicas offline is higher than the minimum ISR configuration.\n This means partitions are not available for Producers with acks=all.\n We recommend alerting when this values is higher than 0.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - }, - { - "color": "red", - "index": 1, - "line": true, - "op": "gt", - "value": 1.0, - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 8, - "y": 1 - }, - "height": null, - "hideTimeOverride": false, - "id": 10, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(kafka_cluster_partition_underminisr{namespace=\"$env\",pod=~\"$broker\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Kafka: Sum of Under-MinISR Partitions", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Number of partitions where all replicas are offline.\n Producers and Consumers are affected by this condition.\n We recommend alerting when this values is higher than 0.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - }, - { - "color": "red", - "index": 1, - "line": true, - "op": "gt", - "value": 1.0, - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 12, - "y": 1 - }, - "height": null, - "hideTimeOverride": false, - "id": 11, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(kafka_controller_kafkacontroller_offlinepartitionscount{namespace=\"$env\",pod=~\"$broker\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Kafka: Sum of Offline Partitions", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Sum of bytes in per second rated over a 5 min. period.\n Gives an idea of the incoming throughput handle by the cluster.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - } - ] - }, - "unit": "binBps" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 16, - "y": 1 - }, - "height": null, - "hideTimeOverride": false, - "id": 12, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(rate(kafka_server_brokertopicmetrics_bytesinpersec{namespace=\"$env\",pod=~\"$broker\"}[5m]))", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Kafka: Bytes In/Sec", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Sum of bytes out per second rated over a 5 min. period.\n Gives an idea of the outgoing throughput handle by the cluster.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - } - ] - }, - "unit": "binBps" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 20, - "y": 1 - }, - "height": null, - "hideTimeOverride": false, - "id": 13, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(rate(kafka_server_brokertopicmetrics_bytesoutpersec{namespace=\"$env\",pod=~\"$broker\"}[5m]))", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Kafka: Bytes Out/Sec", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "collapsed": false, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 2 - }, - "height": null, - "hideTimeOverride": false, - "id": 14, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "System resources", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Rate of CPU seconds used by the Java process.\n 100% usage represents one core. \n If there are multiple cores, the total capacity should be 100% * number_cores.", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 2 - }, - "height": null, - "hideTimeOverride": false, - "id": 15, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "irate(process_cpu_seconds_total{namespace=\"$env\",pod=~\"$broker\"}[5m])", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "CPU usage", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Sum of JVM memory used, without including areas (e.g. heap size).", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 2 - }, - "height": null, - "hideTimeOverride": false, - "id": 16, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum without(area)(jvm_memory_bytes_used{namespace=\"$env\",pod=~\"$broker\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Memory usage", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Sum of seconds used by Garbage Collection.", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 2 - }, - "height": null, - "hideTimeOverride": false, - "id": 17, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum without(gc)(irate(jvm_gc_collection_seconds_sum{namespace=\"$env\",pod=~\"$broker\"}[5m]))", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "GC collection", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 3 - }, - "height": null, - "hideTimeOverride": false, - "id": 18, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Number of messages into topics per second, aggregated by sum without topic.", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "cps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 3 - }, - "height": null, - "hideTimeOverride": false, - "id": 19, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum without(topic) (rate(kafka_server_brokertopicmetrics_messagesinpersec{namespace=\"$env\",pod=~\"$broker\"}[5m]))", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Messages In/Sec", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Number of bytes into topics per second, aggregated by sum without topic.", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "binBps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 3 - }, - "height": null, - "hideTimeOverride": false, - "id": 20, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum without(topic) (rate(kafka_server_brokertopicmetrics_bytesinpersec{namespace=\"$env\",pod=~\"$broker\"}[5m]))", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Bytes In/Sec", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Number of bytes out of topics per second, aggregated by sum without topic.", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "binBps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 3 - }, - "height": null, - "hideTimeOverride": false, - "id": 21, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum without(topic) (rate(kafka_server_brokertopicmetrics_bytesoutpersec{namespace=\"$env\",pod=~\"$broker\"}[5m]))", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Bytes Out/Sec", - "transformations": [], - "transparent": false, - "type": "timeseries" - } - ], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Throughput", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 4 - }, - "height": null, - "hideTimeOverride": false, - "id": 22, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Percent of time the network thread pool is used.\n It should be below 60% or the capacity of threads should be tuned or \n the cluster scaled to cope with the load.", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 4 - }, - "height": null, - "hideTimeOverride": false, - "id": 23, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "1-kafka_network_socketserver_networkprocessoravgidlepercent{namespace=\"$env\",pod=~\"$broker\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Network processor usage", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Percent of time the IO thread pool is used.\n It should be below 60% or the capacity of threads should be tuned or \n the cluster scaled to cope with the load.", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 4 - }, - "height": null, - "hideTimeOverride": false, - "id": 24, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "1-kafka_server_kafkarequesthandlerpool_requesthandleravgidlepercent_total{namespace=\"$env\",pod=~\"$broker\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Request processor (IO) usage", - "transformations": [], - "transparent": false, - "type": "timeseries" - } - ], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Thread utilization", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 5 - }, - "height": null, - "hideTimeOverride": false, - "id": 25, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Requests per second rated over a 5 minutes period.\n Includes API call and version.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "reqps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 5 - }, - "height": null, - "hideTimeOverride": false, - "id": 26, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum without(pod,instance,statefulset_kubernetes_io_pod_name)(rate(kafka_network_requestmetrics_requestspersec{namespace=\"$env\",pod=~\"$broker\"}[5m]))", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{request}}(v{{version}})", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Requests rates", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Request Errors per second rated over a 5 minutes period.\n Includes API call and version.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "reqps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 5 - }, - "height": null, - "hideTimeOverride": false, - "id": 27, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum without(pod,instance,statefulset_kubernetes_io_pod_name)(rate(kafka_network_requestmetrics_errorspersec{namespace=\"$env\",pod=~\"$broker\",error!=\"NONE\"}[5m]))", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{error}}@{{request}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Error rates", - "transformations": [], - "transparent": false, - "type": "timeseries" - } - ], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Request rates", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 6 - }, - "height": null, - "hideTimeOverride": false, - "id": 28, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Sum of connections count across cluster by brokers", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 6 - }, - "height": null, - "hideTimeOverride": false, - "id": 29, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(kafka_server_socketservermetrics_connection_count{namespace=\"$env\",pod=~\"$broker\"}) by (pod)", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Sum of Connections alive per Broker", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Sum of rate of connections created across cluster by brokers", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 6 - }, - "height": null, - "hideTimeOverride": false, - "id": 30, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(kafka_server_socketservermetrics_connection_creation_rate{namespace=\"$env\",pod=~\"$broker\"}) by (pod)", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Sum of Connections creation rate per Broker", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Sum of rate of connections closed across cluster by brokers", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 6 - }, - "height": null, - "hideTimeOverride": false, - "id": 31, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(kafka_server_socketservermetrics_connection_close_rate{namespace=\"$env\",pod=~\"$broker\"}) by (pod)", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Sum of Connections close rate per Broker", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Sum of connections count across cluster by listeners", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 7 - }, - "height": null, - "hideTimeOverride": false, - "id": 32, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(kafka_server_socketservermetrics_connection_count{namespace=\"$env\",pod=~\"$broker\"}) by (listener)", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{listener}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Sum of Connections alive per Listener", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Sum of rate of connections created across cluster by listener", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 7 - }, - "height": null, - "hideTimeOverride": false, - "id": 33, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(kafka_server_socketservermetrics_connection_creation_rate{namespace=\"$env\",pod=~\"$broker\"}) by (listener)", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{listener}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Sum of Connections creation rate per Listener", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Sum of rate of connections closed across cluster by listener", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 7 - }, - "height": null, - "hideTimeOverride": false, - "id": 34, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(kafka_server_socketservermetrics_connection_close_rate{namespace=\"$env\",pod=~\"$broker\"}) by (listener)", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{listener}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Sum of Connections close rate per Listener", - "transformations": [], - "transparent": false, - "type": "timeseries" - } - ], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Connections", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 8 - }, - "height": null, - "hideTimeOverride": false, - "id": 35, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Rate of ISR shrinks per second.\n If this value is continuously higher than 0, then troubleshoot cause of ISR changing constantly.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 8 - }, - "height": null, - "hideTimeOverride": false, - "id": 36, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "rate(kafka_server_replicamanager_isrshrinkspersec{namespace=\"$env\",pod=~\"$broker\"}[5m])", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Rate of ISR Shrinks/sec", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Rate of ISR expands per second.\n If this value is continuously higher than 0, then troubleshoot cause of ISR changing constantly.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 8 - }, - "height": null, - "hideTimeOverride": false, - "id": 37, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "rate(kafka_server_replicamanager_isrexpandspersec{namespace=\"$env\",pod=~\"$broker\"}[5m])", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Rate of ISR Expands/sec", - "transformations": [], - "transparent": false, - "type": "timeseries" - } - ], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "In-Sync Replicas", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 9 - }, - "height": null, - "hideTimeOverride": false, - "id": 38, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Time expend on the request queue.\n Moved from network socket to request queue by Network threads.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 9 - }, - "height": null, - "hideTimeOverride": false, - "id": 39, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_network_requestmetrics_requestqueuetimems{namespace=\"$env\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"Produce\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod}} ({{quantile}}th)", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Produce: Request Queue Time", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Time expend doing local IO.\n Moved from request queue to storage device operations by IO threads.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 9 - }, - "height": null, - "hideTimeOverride": false, - "id": 40, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_network_requestmetrics_localtimems{namespace=\"$env\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"Produce\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod}} ({{quantile}}th)", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Produce: Local Time", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Time expend waiting for coordination with other brokers/internal condition.\n At purgatory.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 9 - }, - "height": null, - "hideTimeOverride": false, - "id": 41, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_network_requestmetrics_remotetimems{namespace=\"$env\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"Produce\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod}} ({{quantile}}th)", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Produce: Remote Time", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Time expend waiting in response queue.\n Moved from purgatory to response queue by IO threads.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 10 - }, - "height": null, - "hideTimeOverride": false, - "id": 42, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_network_requestmetrics_responsequeuetimems{namespace=\"$env\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"Produce\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod}} ({{quantile}}th)", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Produce: Response Queue Time", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Time expend delivering response.\n Moved from response queue to client by Networkc threads.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 10 - }, - "height": null, - "hideTimeOverride": false, - "id": 43, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_network_requestmetrics_responsesendtimems{namespace=\"$env\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"Produce\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod}} ({{quantile}}th)", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Produce: Response Send Time", - "transformations": [], - "transparent": false, - "type": "timeseries" - } - ], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Request latency: Producer", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 11 - }, - "height": null, - "hideTimeOverride": false, - "id": 44, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Time expend on the request queue.\n Moved from network socket to request queue by Network threads.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 11 - }, - "height": null, - "hideTimeOverride": false, - "id": 45, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_network_requestmetrics_requestqueuetimems{namespace=\"$env\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"Fetch\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod}} ({{quantile}}th)", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Fetch: Request Queue Time", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Time expend doing local IO.\n Moved from request queue to storage device operations by IO threads.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 11 - }, - "height": null, - "hideTimeOverride": false, - "id": 46, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_network_requestmetrics_localtimems{namespace=\"$env\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"Fetch\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod}} ({{quantile}}th)", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Fetch: Local Time", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Time expend waiting for coordination with other brokers/internal condition.\n At purgatory.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 11 - }, - "height": null, - "hideTimeOverride": false, - "id": 47, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_network_requestmetrics_remotetimems{namespace=\"$env\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"Fetch\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod}} ({{quantile}}th)", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Fetch: Remote Time", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Time expend waiting in response queue.\n Moved from purgatory to response queue by IO threads.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 12 - }, - "height": null, - "hideTimeOverride": false, - "id": 48, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_network_requestmetrics_responsequeuetimems{namespace=\"$env\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"Fetch\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod}} ({{quantile}}th)", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Fetch: Response Queue Time", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Time expend delivering response.\n Moved from response queue to client by Networkc threads.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 12 - }, - "height": null, - "hideTimeOverride": false, - "id": 49, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_network_requestmetrics_responsesendtimems{namespace=\"$env\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"Fetch\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod}} ({{quantile}}th)", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Fetch: Response Send Time", - "transformations": [], - "transparent": false, - "type": "timeseries" - } - ], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Request latency: Consumer Fetch", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 13 - }, - "height": null, - "hideTimeOverride": false, - "id": 50, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Time expend on the request queue.\n Moved from network socket to request queue by Network threads.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 13 - }, - "height": null, - "hideTimeOverride": false, - "id": 51, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_network_requestmetrics_requestqueuetimems{namespace=\"$env\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"FetchFollower\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod}} ({{quantile}}th)", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Fetch: Request Queue Time", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Time expend doing local IO.\n Moved from request queue to storage device operations by IO threads.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 13 - }, - "height": null, - "hideTimeOverride": false, - "id": 52, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_network_requestmetrics_localtimems{namespace=\"$env\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"FetchFollower\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod}} ({{quantile}}th)", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Fetch: Local Time", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Time expend waiting for coordination with other brokers/internal condition.\n At purgatory.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 13 - }, - "height": null, - "hideTimeOverride": false, - "id": 53, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_network_requestmetrics_remotetimems{namespace=\"$env\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"FetchFollower\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod}} ({{quantile}}th)", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Fetch: Remote Time", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Time expend waiting in response queue.\n Moved from purgatory to response queue by IO threads.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 14 - }, - "height": null, - "hideTimeOverride": false, - "id": 54, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_network_requestmetrics_responsequeuetimems{namespace=\"$env\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"FetchFollower\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod}} ({{quantile}}th)", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Fetch: Response Queue Time", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Time expend delivering response.\n Moved from response queue to client by Networkc threads.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 14 - }, - "height": null, - "hideTimeOverride": false, - "id": 55, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_network_requestmetrics_responsesendtimems{namespace=\"$env\",pod=~\"$broker\",quantile=~\"$quantile\",request=\"FetchFollower\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod}} ({{quantile}}th)", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Fetch: Response Send Time", - "transformations": [], - "transparent": false, - "type": "timeseries" - } - ], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Request latency: Replica Fetch", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 15 - }, - "height": null, - "hideTimeOverride": false, - "id": 56, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Number of groups managed by Broker", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 15 - }, - "height": null, - "hideTimeOverride": false, - "id": 57, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_coordinator_group_groupmetadatamanager_numgroups{namespace=\"$env\",pod=~\"$broker\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Number of Groups per Broker", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Number of stable groups managed by Broker", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 15 - }, - "height": null, - "hideTimeOverride": false, - "id": 58, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupsstable{namespace=\"$env\",pod=~\"$broker\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "stable", - "metric": "", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": null, - "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupspreparingrebalance{namespace=\"$env\",pod=~\"$broker\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "preparing_rebalance", - "metric": "", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": null, - "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupsdead{namespace=\"$env\",pod=~\"$broker\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "dead", - "metric": "", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": null, - "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupscompletingrebalance{namespace=\"$env\",pod=~\"$broker\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "completing_rebalance", - "metric": "", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": null, - "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupsempty{namespace=\"$env\",pod=~\"$broker\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "empty", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Number of Groups per Broker per Status", - "transformations": [], - "transparent": false, - "type": "timeseries" - } - ], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Group Coordinator", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 16 - }, - "height": null, - "hideTimeOverride": false, - "id": 59, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Sum of produce message conversions per second.\n This value increases when the broker receives produce messages from clients using older versions.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "opsps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 16 - }, - "height": null, - "hideTimeOverride": false, - "id": 60, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(kafka_server_brokertopicmetrics_producemessageconversionspersec{namespace=\"$env\",pod=~\"$broker\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Sum of Produce conversion rate per sec", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Sum of fetch message conversions per second.\n This value increases when the broker receives fetch messages from clients using older versions.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "opsps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 16 - }, - "height": null, - "hideTimeOverride": false, - "id": 61, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(kafka_server_brokertopicmetrics_fetchmessageconversionspersec{namespace=\"$env\",pod=~\"$broker\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Sum of Fetch conversion rate per sec", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": "Sum of connections aggregated by client version and name.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 16 - }, - "height": null, - "hideTimeOverride": false, - "id": 62, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(kafka_server_socketservermetrics_connections{namespace=\"$env\",pod=~\"$broker\"}) by (client_software_name,client_software_version)", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_software_name}} (v{{client_software_version}})", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Sum of Connections per version", - "transformations": [], - "transparent": false, - "type": "timeseries" - } - ], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Message Conversion", - "transformations": [], - "transparent": false, - "type": "row" - } - ], - "refresh": "30s", - "rows": [], - "schemaVersion": 12, - "sharedCrosshair": false, - "style": "dark", - "tags": [ - "confluent", - "kafka" - ], - "templating": { - "list": [ - { - "allValue": null, - "auto": false, - "auto_count": 30, - "auto_min": "10s", - "current": { - "selected": false, - "tags": [], - "text": null, - "value": null - }, - "datasource": "${DS_PROMETHEUS}", - "hide": 0, - "includeAll": false, - "label": "Environment", - "multi": false, - "name": "env", - "options": [], - "query": "label_values(namespace)", - "refresh": 1, - "regex": null, - "sort": 1, - "tagValuesQuery": null, - "tagsQuery": null, - "type": "query", - "useTags": false - }, - { - "allValue": null, - "auto": false, - "auto_count": 30, - "auto_min": "10s", - "current": { - "selected": false, - "tags": [], - "text": null, - "value": null - }, - "datasource": "${DS_PROMETHEUS}", - "hide": 0, - "includeAll": true, - "label": "Broker", - "multi": true, - "name": "broker", - "options": [], - "query": "label_values(kafka_server_replicamanager_leadercount{namespace=\"$env\"}, pod)", - "refresh": 1, - "regex": null, - "sort": 1, - "tagValuesQuery": null, - "tagsQuery": null, - "type": "query", - "useTags": false - }, - { - "allValue": null, - "auto": false, - "auto_count": 30, - "auto_min": "10s", - "current": { - "selected": false, - "tags": [], - "text": null, - "value": null - }, - "datasource": "${DS_PROMETHEUS}", - "hide": 0, - "includeAll": false, - "label": "Quantile", - "multi": false, - "name": "quantile", - "options": [], - "query": "label_values(kafka_network_requestmetrics_requestqueuetimems{namespace=\"$env\"}, quantile)", - "refresh": 1, - "regex": null, - "sort": 1, - "tagValuesQuery": null, - "tagsQuery": null, - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "hidden": false, - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "browser", - "title": "Kafka cluster - v2", - "uid": null, - "version": 0 -} diff --git a/grafana-dashboards/cfk/kafka-connect-cluster.json b/grafana-dashboards/cfk/kafka-connect-cluster.json deleted file mode 100644 index c230452e..00000000 --- a/grafana-dashboards/cfk/kafka-connect-cluster.json +++ /dev/null @@ -1,4184 +0,0 @@ -{ - "__inputs": [ - { - "description": "", - "label": "Prometheus", - "name": "DS_PROMETHEUS", - "pluginId": "prometheus", - "pluginName": "Prometheus", - "type": "datasource" - } - ], - "annotations": { - "list": [] - }, - "description": "Overview of the Kafka Connect cluster", - "editable": true, - "gnetId": null, - "hideControls": false, - "id": null, - "links": [], - "panels": [ - { - "cacheTimeout": null, - "collapsed": false, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 1, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Overview", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Kafka Connect online workers returning metrics.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 0, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 2, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "count(kafka_connect_app_info{namespace=\"$env\",app=\"$connect_cluster\",version!=\"\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Connect: Online Workers", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Number of tasks deployed on Kafka Connect cluster.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 4, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 3, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(kafka_connect_connect_worker_metrics_connector_total_task_count{namespace=\"$env\",app=\"$connect_cluster\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Connect: Sum of Total Tasks", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Number of Running Tasks on the Kafka Connect cluster.\n Ideally, this number should be equal to the total number of tasks deployed.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - }, - { - "color": "green", - "index": 1, - "line": true, - "op": "gt", - "value": 1.0, - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 8, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 4, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(kafka_connect_connect_worker_metrics_connector_running_task_count{namespace=\"$env\",app=\"$connect_cluster\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Connect: Sum of Running Tasks", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Number of Paused Tasks on the Kafka Connect cluster.\n Ideally, this number should be zero, as tasks should be running.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - }, - { - "color": "yellow", - "index": 1, - "line": true, - "op": "gt", - "value": 1.0, - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 12, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 5, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(kafka_connect_connect_worker_metrics_connector_paused_task_count{namespace=\"$env\",app=\"$connect_cluster\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Connect: Sum of Paused Tasks", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Number of Paused Tasks on the Kafka Connect cluster.\n Ideally, this number should be zero, as tasks should be running.\n It's recommended alerting when this value is higher than 0.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - }, - { - "color": "red", - "index": 1, - "line": true, - "op": "gt", - "value": 1.0, - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 16, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 6, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(kafka_connect_connect_worker_metrics_connector_failed_task_count{namespace=\"$env\",app=\"$connect_cluster\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Connect: Sum of Failed Tasks", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Informative value. Time since last rebalance.\n When this value is continuously and repeatedly low means some connectors are failing and rebalancing is triggered constantly.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - } - ] - }, - "unit": "clockms" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 20, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 7, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_connect_connect_worker_rebalance_metrics_time_since_last_rebalance_ms{namespace=\"$env\",app=\"$connect_cluster\"} >= 0", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Connect: Time since last rebalance", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "color": { - "mode": "thresholds" - }, - "columns": [], - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": { - "align": "auto", - "displayMode": "auto", - "filterable": false - }, - "thresholds": { - "mode": "absolute", - "steps": [] - } - }, - "overrides": [] - }, - "fontSize": "100%", - "gridPos": { - "h": 5, - "w": 24, - "x": 0, - "y": 1 - }, - "height": null, - "hideTimeOverride": false, - "id": 8, - "interval": null, - "links": [], - "mappings": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "showHeader": true - }, - "repeat": null, - "repeatDirection": null, - "span": 6, - "targets": [ - { - "datasource": null, - "expr": "kafka_connect_app_info{namespace=\"$env\",app=\"$connect_cluster\",start_time_ms!=\"\"}", - "format": "table", - "hide": false, - "instant": true, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": null, - "expr": "kafka_connect_app_info{namespace=\"$env\",app=\"$connect_cluster\",version!=\"\"}", - "format": "table", - "hide": false, - "instant": true, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": null, - "expr": "sum by (pod) (kafka_connect_connect_worker_metrics_connector_count{namespace=\"$env\",app=\"$connect_cluster\"})", - "format": "table", - "hide": false, - "instant": true, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": null, - "expr": "sum by (pod) (kafka_connect_connect_worker_metrics_connector_startup_success_total{namespace=\"$env\",app=\"$connect_cluster\"})", - "format": "table", - "hide": false, - "instant": true, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": null, - "expr": "sum by (pod) (kafka_connect_connect_worker_metrics_connector_startup_failure_total{namespace=\"$env\",app=\"$connect_cluster\"})", - "format": "table", - "hide": false, - "instant": true, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": null, - "expr": "sum by (pod) (kafka_connect_connect_worker_metrics_task_count{namespace=\"$env\",app=\"$connect_cluster\"})", - "format": "table", - "hide": false, - "instant": true, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": null, - "expr": "sum by (pod) (kafka_connect_connect_worker_metrics_task_startup_success_total{namespace=\"$env\",app=\"$connect_cluster\"})", - "format": "table", - "hide": false, - "instant": true, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": null, - "expr": "sum by (pod) (kafka_connect_connect_worker_metrics_task_startup_failure_total{namespace=\"$env\",app=\"$connect_cluster\"})", - "format": "table", - "hide": false, - "instant": true, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Connect Workers", - "transformations": [ - { - "id": "seriesToColumns", - "options": { - "byField": "pod" - } - }, - { - "id": "filterFieldsByName", - "options": { - "include": { - "names": [ - "pod", - "app 1", - "start_time_ms", - "version", - "Value #C", - "Value #D", - "Value #E", - "Value #F", - "Value #G", - "Value #H", - "namespace 1" - ] - } - } - }, - { - "id": "organize", - "options": { - "excludeByName": {}, - "indexByName": { - "app 1": 1, - "namespace 1": 0, - "pod": 2, - "start_time_ms": 3, - "version": 4 - }, - "renameByName": { - "Value #C": "connectors", - "Value #D": "conn. success", - "Value #E": "conn. failure", - "Value #F": "tasks", - "Value #G": "tasks success", - "Value #H": "tasks failure", - "app 1": "cluster", - "namespace 1": "environment", - "pod": "worker", - "start_time_ms": "start time", - "version": "version" - } - } - } - ], - "transparent": false, - "type": "table" - }, - { - "cacheTimeout": null, - "color": { - "mode": "thresholds" - }, - "columns": [], - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": { - "align": "auto", - "displayMode": "auto", - "filterable": false - }, - "thresholds": { - "mode": "absolute", - "steps": [] - } - }, - "overrides": [] - }, - "fontSize": "100%", - "gridPos": { - "h": 5, - "w": 24, - "x": 0, - "y": 2 - }, - "height": null, - "hideTimeOverride": false, - "id": 9, - "interval": null, - "links": [], - "mappings": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "showHeader": true - }, - "repeat": null, - "repeatDirection": null, - "span": 6, - "targets": [ - { - "datasource": null, - "expr": "kafka_connect_connector_info{namespace=\"$env\",app=\"$connect_cluster\"}", - "format": "table", - "hide": false, - "instant": true, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": null, - "expr": "sum by (connector) (kafka_connect_connect_worker_metrics_connector_total_task_count{namespace=\"$env\",app=\"$connect_cluster\"})", - "format": "table", - "hide": false, - "instant": true, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": null, - "expr": "sum by (connector) (kafka_connect_connect_worker_metrics_connector_running_task_count{namespace=\"$env\",app=\"$connect_cluster\"})", - "format": "table", - "hide": false, - "instant": true, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": null, - "expr": "sum by (connector) (kafka_connect_connect_worker_metrics_connector_failed_task_count{namespace=\"$env\",app=\"$connect_cluster\"})", - "format": "table", - "hide": false, - "instant": true, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": null, - "expr": "sum by (connector) (kafka_connect_connect_worker_metrics_connector_paused_task_count{namespace=\"$env\",app=\"$connect_cluster\"})", - "format": "table", - "hide": false, - "instant": true, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Connectors", - "transformations": [ - { - "id": "seriesToColumns", - "options": { - "byField": "connector" - } - }, - { - "id": "filterFieldsByName", - "options": { - "include": { - "names": [ - "connector", - "Value #B", - "Value #C", - "Value #D", - "Value #E" - ] - } - } - }, - { - "id": "organize", - "options": { - "excludeByName": {}, - "renameByName": { - "Value #B": "tasks", - "Value #C": "running", - "Value #D": "failed", - "Value #E": "paused" - } - } - } - ], - "transparent": false, - "type": "table" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 3 - }, - "height": null, - "hideTimeOverride": false, - "id": 10, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_connect_connector_task_metrics_running_ratio{namespace=\"$env\",app=\"$connect_cluster\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{connector}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Tasks Running Ratio", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 3 - }, - "height": null, - "hideTimeOverride": false, - "id": 11, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_connect_connect_worker_rebalance_metrics_rebalance_avg_time_ms{namespace=\"$env\",app=\"$connect_cluster\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Rebalance Latency", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "collapsed": false, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 4 - }, - "height": null, - "hideTimeOverride": false, - "id": 12, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "System", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Rate of CPU seconds used by the Java process.\n 100% usage represents one core. \n If there are multiple cores, the total capacity should be 100% * number_cores.", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 4 - }, - "height": null, - "hideTimeOverride": false, - "id": 13, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "irate(process_cpu_seconds_total{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\"}[5m])", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "CPU usage", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Sum of JVM memory used, without including areas (e.g. heap size).", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 4 - }, - "height": null, - "hideTimeOverride": false, - "id": 14, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum without(area)(jvm_memory_bytes_used{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Memory usage", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Sum of seconds used by Garbage Collection.", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 4 - }, - "height": null, - "hideTimeOverride": false, - "id": 15, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum without(gc)(irate(jvm_gc_collection_seconds_sum{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\"}[5m]))", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "GC collection", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 6 - }, - "height": null, - "hideTimeOverride": false, - "id": 16, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 6 - }, - "height": null, - "hideTimeOverride": false, - "id": 17, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_connect_connector_task_metrics_batch_size_avg{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{connector}}[{{task}}]", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Batch Size (Avg.)", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 6 - }, - "height": null, - "hideTimeOverride": false, - "id": 18, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_connect_connector_task_metrics_batch_size_max{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{connector}}[{{task}}]", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Batch Size (Max.)", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 7 - }, - "height": null, - "hideTimeOverride": false, - "id": 19, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_connect_connector_task_metrics_offset_commit_success_percentage{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{connector}}[{{task}}]", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Offset commit success %", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 7 - }, - "height": null, - "hideTimeOverride": false, - "id": 20, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_connect_connector_task_metrics_offset_commit_avg_time_ms{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{connector}}[{{task}}]", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Offset commit avg. latency", - "transformations": [], - "transparent": false, - "type": "timeseries" - } - ], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Tasks", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 8 - }, - "height": null, - "hideTimeOverride": false, - "id": 21, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 8 - }, - "height": null, - "hideTimeOverride": false, - "id": 22, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_connect_task_error_metrics_total_record_failures{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{connector}}[{{task}}]", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Total Record Failures", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 8 - }, - "height": null, - "hideTimeOverride": false, - "id": 23, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_connect_task_error_metrics_total_record_errors{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{connector}}[{{task}}]", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Total Record Error", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 8 - }, - "height": null, - "hideTimeOverride": false, - "id": 24, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_connect_task_error_metrics_total_records_skipped{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{connector}}[{{task}}]", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Total Records Skipped", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 9 - }, - "height": null, - "hideTimeOverride": false, - "id": 25, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_connect_task_error_metrics_total_errors_logged{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{connector}}[{{task}}]", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Total Errors Logged", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 9 - }, - "height": null, - "hideTimeOverride": false, - "id": 26, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_connect_task_error_metrics_total_retries{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{connector}}[{{task}}]", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Total Retries", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 9 - }, - "height": null, - "hideTimeOverride": false, - "id": 27, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_connect_task_error_metrics_deadletterqueue_produce_requests{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{connector}}[{{task}}]", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Dead Letter Topic Requests", - "transformations": [], - "transparent": false, - "type": "timeseries" - } - ], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Task Errors", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 10 - }, - "height": null, - "hideTimeOverride": false, - "id": 28, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 10 - }, - "height": null, - "hideTimeOverride": false, - "id": 29, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_connect_source_task_metrics_poll_batch_avg_time_ms{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{connector}}[{{task}}]", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Poll Batch Avg. Latency", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 10 - }, - "height": null, - "hideTimeOverride": false, - "id": 30, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_connect_source_task_metrics_poll_batch_max_time_ms{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{connector}}[{{task}}]", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Poll Batch Max. Latency", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ops" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 11 - }, - "height": null, - "hideTimeOverride": false, - "id": 31, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_connect_source_task_metrics_source_record_poll_rate{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{connector}}[{{task}}]", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Source Record Poll Rate", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ops" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 11 - }, - "height": null, - "hideTimeOverride": false, - "id": 32, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_connect_source_task_metrics_source_record_write_rate{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{connector}}[{{task}}]", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Source Record Write Rate", - "transformations": [], - "transparent": false, - "type": "timeseries" - } - ], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Source Tasks", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 12 - }, - "height": null, - "hideTimeOverride": false, - "id": 33, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 12 - }, - "height": null, - "hideTimeOverride": false, - "id": 34, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_connect_sink_task_metrics_put_batch_avg_time_ms{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{connector}}[{{task}}]", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Put Batch Avg. Latency", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 12 - }, - "height": null, - "hideTimeOverride": false, - "id": 35, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_connect_sink_task_metrics_put_batch_max_time_ms{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{connector}}[{{task}}]", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Put Batch Max. Latency", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 13 - }, - "height": null, - "hideTimeOverride": false, - "id": 36, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_connect_sink_task_metrics_partition_count{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{connector}}[{{task}}]", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Partition Count", - "transformations": [], - "transparent": false, - "type": "timeseries" - } - ], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Sink Tasks", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 5 - }, - "height": null, - "hideTimeOverride": false, - "id": 37, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "binBps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 5 - }, - "height": null, - "hideTimeOverride": false, - "id": 38, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_connect_connect_metrics_incoming_byte_rate{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Incoming Byte Rate", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "binBps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 5 - }, - "height": null, - "hideTimeOverride": false, - "id": 39, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_connect_connect_metrics_outgoing_byte_rate{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Outgoing Byte Rate", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 6 - }, - "height": null, - "hideTimeOverride": false, - "id": 40, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_connect_connect_metrics_io_ratio{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "IO Ratio", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "binBps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 6 - }, - "height": null, - "hideTimeOverride": false, - "id": 41, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_connect_connect_metrics_network_io_rate{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Network IO Rate", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 7 - }, - "height": null, - "hideTimeOverride": false, - "id": 42, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_connect_connect_metrics_connection_count{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Active Connections", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 7 - }, - "height": null, - "hideTimeOverride": false, - "id": 43, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_connect_connect_metrics_successful_authentication_rate{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod}} (success)", - "metric": "", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": null, - "expr": "kafka_connect_connect_metrics_failed_authentication_total{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod}} (failed)", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Authentications", - "transformations": [], - "transparent": false, - "type": "timeseries" - } - ], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Connect Workers", - "transformations": [], - "transparent": false, - "type": "row" - } - ], - "refresh": "30s", - "rows": [], - "schemaVersion": 12, - "sharedCrosshair": false, - "style": "dark", - "tags": [ - "confluent", - "kafka-connect" - ], - "templating": { - "list": [ - { - "allValue": null, - "auto": false, - "auto_count": 30, - "auto_min": "10s", - "current": { - "selected": false, - "tags": [], - "text": null, - "value": null - }, - "datasource": "Prometheus", - "hide": 0, - "includeAll": false, - "label": "Environment", - "multi": false, - "name": "env", - "options": [], - "query": "label_values(namespace)", - "refresh": 1, - "regex": null, - "sort": 1, - "tagValuesQuery": null, - "tagsQuery": null, - "type": "query", - "useTags": false - }, - { - "allValue": null, - "auto": false, - "auto_count": 30, - "auto_min": "10s", - "current": { - "selected": false, - "tags": [], - "text": null, - "value": null - }, - "datasource": "Prometheus", - "hide": 0, - "includeAll": false, - "label": "Connect cluster", - "multi": false, - "name": "connect_cluster", - "options": [], - "query": "label_values(kafka_connect_connect_worker_metrics_connector_total_task_count{namespace=\"$env\"}, app)", - "refresh": 1, - "regex": null, - "sort": 1, - "tagValuesQuery": null, - "tagsQuery": null, - "type": "query", - "useTags": false - }, - { - "allValue": null, - "auto": false, - "auto_count": 30, - "auto_min": "10s", - "current": { - "selected": false, - "tags": [], - "text": null, - "value": null - }, - "datasource": "Prometheus", - "hide": 0, - "includeAll": true, - "label": "Connect worker", - "multi": true, - "name": "connect_worker", - "options": [], - "query": "label_values(kafka_connect_connect_worker_metrics_connector_total_task_count{namespace=\"$env\",app=\"$connect_cluster\"}, pod)", - "refresh": 1, - "regex": null, - "sort": 1, - "tagValuesQuery": null, - "tagsQuery": null, - "type": "query", - "useTags": false - }, - { - "allValue": null, - "auto": false, - "auto_count": 30, - "auto_min": "10s", - "current": { - "selected": false, - "tags": [], - "text": null, - "value": null - }, - "datasource": "Prometheus", - "hide": 0, - "includeAll": true, - "label": "Connector", - "multi": true, - "name": "connector", - "options": [], - "query": "label_values(kafka_connect_connect_worker_metrics_connector_total_task_count{namespace=\"$env\",app=\"$connect_cluster\"}, connector)", - "refresh": 1, - "regex": null, - "sort": 1, - "tagValuesQuery": null, - "tagsQuery": null, - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "hidden": false, - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "browser", - "title": "Kafka Connect cluster - v2", - "uid": null, - "version": 0 -} diff --git a/grafana-dashboards/cfk/kafka-topics.json b/grafana-dashboards/cfk/kafka-topics.json deleted file mode 100644 index 20e0198c..00000000 --- a/grafana-dashboards/cfk/kafka-topics.json +++ /dev/null @@ -1,1085 +0,0 @@ -{ - "__inputs": [ - { - "description": "", - "label": "Prometheus", - "name": "DS_PROMETHEUS", - "pluginId": "prometheus", - "pluginName": "Prometheus", - "type": "datasource" - } - ], - "annotations": { - "list": [] - }, - "description": "Overview of the Kafka topics", - "editable": true, - "gnetId": null, - "hideControls": false, - "id": null, - "links": [], - "panels": [ - { - "cacheTimeout": null, - "collapsed": false, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 1, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Throughput", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "cps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 2, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10, sum without(instance,pod,statefulset_kubernetes_io_pod_name) (rate(kafka_server_brokertopicmetrics_messagesinpersec{topic=~\"$topic\",namespace=\"$env\"}[5m])))", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{topic}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Messages In/Sec", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 3, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10, sum(kafka_log_log_size{namespace=\"$env\",topic=~\"$topic\"}) by (topic))", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{topic}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Log size", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "binBps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 1 - }, - "height": null, - "hideTimeOverride": false, - "id": 4, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10, sum without(instance,pod,statefulset_kubernetes_io_pod_name) (rate(kafka_server_brokertopicmetrics_bytesinpersec{topic=~\"$topic\",namespace=\"$env\"}[5m])))", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{topic}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Bytes In/Sec", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "binBps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 1 - }, - "height": null, - "hideTimeOverride": false, - "id": 5, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10, sum without(instance,pod,statefulset_kubernetes_io_pod_name) (rate(kafka_server_brokertopicmetrics_bytesoutpersec{topic=~\"$topic\",namespace=\"$env\"}[5m])))", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{topic}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Bytes Out/Sec", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "reqps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 2 - }, - "height": null, - "hideTimeOverride": false, - "id": 6, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10, sum(rate(kafka_server_brokertopicmetrics_totalproducerequestspersec{ namespace=\"$env\", topic=~\"$topic\"}[5m])) by (topic))", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{topic}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Produce Requests/Sec", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "reqps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 2 - }, - "height": null, - "hideTimeOverride": false, - "id": 7, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10, sum(rate(kafka_server_brokertopicmetrics_totalfetchrequestspersec{ namespace=\"$env\", topic=~\"$topic\"}[5m])) by (topic))", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{topic}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Consumer Fetch Requests/Sec", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 3 - }, - "height": null, - "hideTimeOverride": false, - "id": 8, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ - { - "cacheTimeout": null, - "color": { - "mode": "thresholds" - }, - "columns": [], - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": { - "align": "auto", - "displayMode": "auto", - "filterable": true - }, - "thresholds": { - "mode": "absolute", - "steps": [] - } - }, - "overrides": [] - }, - "fontSize": "100%", - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 3 - }, - "height": null, - "hideTimeOverride": false, - "id": 9, - "interval": null, - "links": [], - "mappings": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "showHeader": true - }, - "repeat": null, - "repeatDirection": null, - "span": 6, - "targets": [ - { - "datasource": null, - "expr": "kafka_log_log_logstartoffset{namespace=\"$env\",topic=~\"$topic\"}", - "format": "table", - "hide": false, - "instant": true, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{topic}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Start Offsets", - "transformations": [ - { - "id": "organize", - "options": { - "excludeByName": { - "Time": true, - "__name__": true, - "app": true, - "clusterId": true, - "confluentPlatform": true, - "confluent_platform": true, - "controller_revision_hash": true, - "instance": true, - "job": true, - "namespace": true, - "platform_confluent_io_type": true, - "statefulset_kubernetes_io_pod_name": true, - "type": true - }, - "indexByName": { - "Value": 4, - "partition": 3, - "pod": 1, - "topic": 2 - }, - "renameByName": { - "Value": "offset" - } - } - }, - { - "id": "convertFieldType", - "options": { - "conversions": [ - { - "destinationType": "number", - "targetField": "partition" - } - ], - "fields": {} - } - }, - { - "id": "sortBy", - "options": { - "fields": {}, - "sort": [ - { - "field": "topic" - } - ] - } - }, - { - "id": "sortBy", - "options": { - "fields": {}, - "sort": [ - { - "field": "partition" - } - ] - } - } - ], - "transparent": false, - "type": "table" - }, - { - "cacheTimeout": null, - "color": { - "mode": "thresholds" - }, - "columns": [], - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": { - "align": "auto", - "displayMode": "auto", - "filterable": true - }, - "thresholds": { - "mode": "absolute", - "steps": [] - } - }, - "overrides": [] - }, - "fontSize": "100%", - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 3 - }, - "height": null, - "hideTimeOverride": false, - "id": 10, - "interval": null, - "links": [], - "mappings": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "showHeader": true - }, - "repeat": null, - "repeatDirection": null, - "span": 6, - "targets": [ - { - "datasource": null, - "expr": "kafka_log_log_logendoffset{namespace=\"$env\",topic=~\"$topic\"}", - "format": "table", - "hide": false, - "instant": true, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{topic}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "End Offsets", - "transformations": [ - { - "id": "organize", - "options": { - "excludeByName": { - "Time": true, - "__name__": true, - "app": true, - "clusterId": true, - "confluentPlatform": true, - "confluent_platform": true, - "controller_revision_hash": true, - "instance": true, - "job": true, - "namespace": true, - "platform_confluent_io_type": true, - "statefulset_kubernetes_io_pod_name": true, - "type": true - }, - "indexByName": { - "Value": 4, - "partition": 3, - "pod": 1, - "topic": 2 - }, - "renameByName": { - "Value": "offset" - } - } - }, - { - "id": "convertFieldType", - "options": { - "conversions": [ - { - "destinationType": "number", - "targetField": "partition" - } - ], - "fields": {} - } - }, - { - "id": "sortBy", - "options": { - "fields": {}, - "sort": [ - { - "field": "topic" - } - ] - } - }, - { - "id": "sortBy", - "options": { - "fields": {}, - "sort": [ - { - "field": "partition" - } - ] - } - } - ], - "transparent": false, - "type": "table" - } - ], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Offsets", - "transformations": [], - "transparent": false, - "type": "row" - } - ], - "refresh": "30s", - "rows": [], - "schemaVersion": 12, - "sharedCrosshair": false, - "style": "dark", - "tags": [ - "confluent", - "kafka" - ], - "templating": { - "list": [ - { - "allValue": null, - "auto": false, - "auto_count": 30, - "auto_min": "10s", - "current": { - "selected": false, - "tags": [], - "text": null, - "value": null - }, - "datasource": "Prometheus", - "hide": 0, - "includeAll": false, - "label": "Environment", - "multi": false, - "name": "env", - "options": [], - "query": "label_values(namespace)", - "refresh": 1, - "regex": null, - "sort": 1, - "tagValuesQuery": null, - "tagsQuery": null, - "type": "query", - "useTags": false - }, - { - "allValue": null, - "auto": false, - "auto_count": 30, - "auto_min": "10s", - "current": { - "selected": false, - "tags": [], - "text": null, - "value": null - }, - "datasource": "Prometheus", - "hide": 0, - "includeAll": true, - "label": "Topic", - "multi": true, - "name": "topic", - "options": [], - "query": "label_values(kafka_log_log_size{namespace=\"$env\"}, topic)", - "refresh": 1, - "regex": null, - "sort": 1, - "tagValuesQuery": null, - "tagsQuery": null, - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "hidden": false, - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "browser", - "title": "Kafka topics - v2", - "uid": null, - "version": 0 -} diff --git a/grafana-dashboards/cfk/ksqldb-cluster.json b/grafana-dashboards/cfk/ksqldb-cluster.json deleted file mode 100644 index 054efed1..00000000 --- a/grafana-dashboards/cfk/ksqldb-cluster.json +++ /dev/null @@ -1,3683 +0,0 @@ -{ - "__inputs": [ - { - "description": "", - "label": "Prometheus", - "name": "DS_PROMETHEUS", - "pluginId": "prometheus", - "pluginName": "Prometheus", - "type": "datasource" - } - ], - "annotations": { - "list": [] - }, - "description": "Overview of ksqlDB clusters.", - "editable": true, - "gnetId": null, - "hideControls": false, - "id": null, - "links": [], - "panels": [ - { - "cacheTimeout": null, - "collapsed": false, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 1, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Overview", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "ksqlDB online instances returning metrics.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 0, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 2, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "count(ksql_ksql_engine_query_stats_num_active_queries{namespace=\"$env\", app=\"$ksqldb_cluster\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "ksqlDB: Online Servers", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Number of active queries deployed in the cluster.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 4, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 3, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(ksql_ksql_engine_query_stats_num_active_queries{namespace=\"$env\", app=\"$ksqldb_cluster\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "ksqlDB: Sum of Active Queries", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Number of running queries deployed in the cluster.\n Ideally, this number should be equal to the number of active queries as queries should be running.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - }, - { - "color": "green", - "index": 1, - "line": true, - "op": "gt", - "value": 1.0, - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 8, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 4, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(ksql_ksql_engine_query_stats_running_queries{namespace=\"$env\", app=\"$ksqldb_cluster\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "ksqlDB: Sum of Running Queries", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Number of queries rebalancing in the cluster.\n Ideally, this number should be equal zero, or return to zero in a short period (e.g. 1 minute).\n It's recommended to alert if the number of rebalancing queries stay higher than 0 for a longer period of time.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - }, - { - "color": "yellow", - "index": 1, - "line": true, - "op": "gt", - "value": 1.0, - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 12, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 5, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(ksql_ksql_engine_query_stats_rebalancing_queries{namespace=\"$env\", app=\"$ksqldb_cluster\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "ksqlDB: Sum of Rebalancing Queries", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Number of queries failed in the cluster.\n Ideally, this number should be equal zero.\n It's recommended to alert if the number of queries failed is higher than 0.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - }, - { - "color": "red", - "index": 1, - "line": true, - "op": "gt", - "value": 1.0, - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 16, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 6, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "avg(ksql_ksql_engine_query_stats_error_queries{namespace=\"$env\", app=\"$ksqldb_cluster\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Connect: Sum of Queries Failed", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 1 - }, - "height": null, - "hideTimeOverride": false, - "id": 7, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "ksql_ksql_engine_query_stats_liveness_indicator{namespace=\"$env\",app=\"$ksqldb_cluster\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Cluster Liveness", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "cps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 1 - }, - "height": null, - "hideTimeOverride": false, - "id": 8, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "ksql_ksql_engine_query_stats_messages_consumed_per_sec{namespace=\"$env\",app=\"$ksqldb_cluster\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Messages consumed/sec", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "cps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 1 - }, - "height": null, - "hideTimeOverride": false, - "id": 9, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "ksql_ksql_engine_query_stats_messages_produced_per_sec{namespace=\"$env\",app=\"$ksqldb_cluster\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Messages produced/sec", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "collapsed": false, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 2 - }, - "height": null, - "hideTimeOverride": false, - "id": 10, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "System", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Rate of CPU seconds used by the Java process.\n 100% usage represents one core. \n If there are multiple cores, the total capacity should be 100% * number_cores.", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 2 - }, - "height": null, - "hideTimeOverride": false, - "id": 11, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "irate(process_cpu_seconds_total{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\"}[5m])", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "CPU usage", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Sum of JVM memory used, without including areas (e.g. heap size).", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 2 - }, - "height": null, - "hideTimeOverride": false, - "id": 12, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum without(area)(jvm_memory_bytes_used{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Memory usage", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Sum of seconds used by Garbage Collection.", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 2 - }, - "height": null, - "hideTimeOverride": false, - "id": 13, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum without(gc)(irate(jvm_gc_collection_seconds_sum{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\"}[5m]))", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "GC collection", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 3 - }, - "height": null, - "hideTimeOverride": false, - "id": 14, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 3 - }, - "height": null, - "hideTimeOverride": false, - "id": 15, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_streams_stream_thread_metrics_poll_latency_avg{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{thread_id}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Poll Latency (Avg.)", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 3 - }, - "height": null, - "hideTimeOverride": false, - "id": 16, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_streams_stream_thread_metrics_poll_latency_max{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{thread_id}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Poll Latency (Max.)", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 4 - }, - "height": null, - "hideTimeOverride": false, - "id": 17, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_streams_stream_thread_metrics_process_latency_avg{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{thread_id}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Process Latency (Avg.)", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 4 - }, - "height": null, - "hideTimeOverride": false, - "id": 18, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_streams_stream_thread_metrics_process_latency_max{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{thread_id}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Process Latency (Max.)", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 5 - }, - "height": null, - "hideTimeOverride": false, - "id": 19, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_streams_stream_thread_metrics_commit_latency_avg{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{thread_id}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Commit Latency (Avg.)", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 5 - }, - "height": null, - "hideTimeOverride": false, - "id": 20, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_streams_stream_thread_metrics_commit_latency_max{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{thread_id}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Commit Latency (Max.)", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 6 - }, - "height": null, - "hideTimeOverride": false, - "id": 21, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_streams_stream_thread_metrics_punctuate_latency_avg{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{thread_id}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Punctuate Latency (Avg.)", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 6 - }, - "height": null, - "hideTimeOverride": false, - "id": 22, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_streams_stream_thread_metrics_punctuate_latency_max{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{thread_id}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Punctuate Latency (Max.)", - "transformations": [], - "transparent": false, - "type": "timeseries" - } - ], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Queries Performance", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 7 - }, - "height": null, - "hideTimeOverride": false, - "id": 23, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ops" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 7 - }, - "height": null, - "hideTimeOverride": false, - "id": 24, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_streams_stream_state_metrics_put_rate{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{thread_id}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Put Rate", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 7 - }, - "height": null, - "hideTimeOverride": false, - "id": 25, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_streams_stream_state_metrics_put_latency_avg{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{thread_id}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Put Latency (Avg.)", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 7 - }, - "height": null, - "hideTimeOverride": false, - "id": 26, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_streams_stream_state_metrics_put_latency_max{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{thread_id}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Put Latency (Max.)", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ops" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 8 - }, - "height": null, - "hideTimeOverride": false, - "id": 27, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_streams_stream_state_metrics_put_if_absent_rate{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{thread_id}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Put if absent Rate", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 8 - }, - "height": null, - "hideTimeOverride": false, - "id": 28, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_streams_stream_state_metrics_put_if_absent_latency_avg{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{thread_id}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Put if absent Latency (Avg.)", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 8 - }, - "height": null, - "hideTimeOverride": false, - "id": 29, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_streams_stream_state_metrics_put_if_absent_latency_max{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{thread_id}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Put if absent Latency (Max.)", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ops" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 9 - }, - "height": null, - "hideTimeOverride": false, - "id": 30, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_streams_stream_state_metrics_fetch_rate{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{thread_id}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Fetch Rate", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 9 - }, - "height": null, - "hideTimeOverride": false, - "id": 31, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_streams_stream_state_metrics_fetch_latency_avg{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{thread_id}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Fetch Latency (Avg.)", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 9 - }, - "height": null, - "hideTimeOverride": false, - "id": 32, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_streams_stream_state_metrics_fetch_latency_max{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{thread_id}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Fetch Latency (Max.)", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ops" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 10 - }, - "height": null, - "hideTimeOverride": false, - "id": 33, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_streams_stream_state_metrics_delete_rate{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{thread_id}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Delete Rate", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 10 - }, - "height": null, - "hideTimeOverride": false, - "id": 34, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_streams_stream_state_metrics_delete_latency_avg{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{thread_id}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Delete Latency (Avg.)", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 10 - }, - "height": null, - "hideTimeOverride": false, - "id": 35, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_streams_stream_state_metrics_delete_latency_max{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{thread_id}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Delete Latency (Max.)", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ops" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 11 - }, - "height": null, - "hideTimeOverride": false, - "id": 36, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_streams_stream_state_metrics_restore_rate{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{thread_id}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Restore Rate", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 11 - }, - "height": null, - "hideTimeOverride": false, - "id": 37, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_streams_stream_state_metrics_restore_latency_avg{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{thread_id}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Restore Latency (Avg.)", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 11 - }, - "height": null, - "hideTimeOverride": false, - "id": 38, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_streams_stream_state_metrics_restore_latency_max{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{thread_id}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Restore Latency (Max.)", - "transformations": [], - "transparent": false, - "type": "timeseries" - } - ], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "State Stores", - "transformations": [], - "transparent": false, - "type": "row" - } - ], - "refresh": "30s", - "rows": [], - "schemaVersion": 12, - "sharedCrosshair": false, - "style": "dark", - "tags": [ - "confluent", - "ksqldb" - ], - "templating": { - "list": [ - { - "allValue": null, - "auto": false, - "auto_count": 30, - "auto_min": "10s", - "current": { - "selected": false, - "tags": [], - "text": null, - "value": null - }, - "datasource": "Prometheus", - "hide": 0, - "includeAll": false, - "label": "Environment", - "multi": false, - "name": "env", - "options": [], - "query": "label_values(namespace)", - "refresh": 1, - "regex": null, - "sort": 1, - "tagValuesQuery": null, - "tagsQuery": null, - "type": "query", - "useTags": false - }, - { - "allValue": null, - "auto": false, - "auto_count": 30, - "auto_min": "10s", - "current": { - "selected": false, - "tags": [], - "text": null, - "value": null - }, - "datasource": "Prometheus", - "hide": 0, - "includeAll": false, - "label": "ksqlDB cluster", - "multi": false, - "name": "ksqldb_cluster", - "options": [], - "query": "label_values(ksql_ksql_engine_query_stats_num_active_queries{namespace=\"$env\"},app)", - "refresh": 1, - "regex": null, - "sort": 1, - "tagValuesQuery": null, - "tagsQuery": null, - "type": "query", - "useTags": false - }, - { - "allValue": null, - "auto": false, - "auto_count": 30, - "auto_min": "10s", - "current": { - "selected": false, - "tags": [], - "text": null, - "value": null - }, - "datasource": "Prometheus", - "hide": 2, - "includeAll": false, - "label": "ksqlDB cluster ID", - "multi": false, - "name": "ksqldb_cluster_id", - "options": [], - "query": "label_values(ksql_ksql_engine_query_stats_num_active_queries{namespace=\"$env\"},ksql_cluster)", - "refresh": 1, - "regex": null, - "sort": 1, - "tagValuesQuery": null, - "tagsQuery": null, - "type": "query", - "useTags": false - }, - { - "allValue": null, - "auto": false, - "auto_count": 30, - "auto_min": "10s", - "current": { - "selected": false, - "tags": [], - "text": null, - "value": null - }, - "datasource": "Prometheus", - "hide": 0, - "includeAll": true, - "label": "ksqlDB server", - "multi": true, - "name": "ksqldb_server", - "options": [], - "query": "label_values(ksql_ksql_engine_query_stats_num_active_queries{namespace=\"$env\",app=\"$ksqldb_cluster\"}, pod)", - "refresh": 1, - "regex": null, - "sort": 1, - "tagValuesQuery": null, - "tagsQuery": null, - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "hidden": false, - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "browser", - "title": "ksqlDB cluster - v2", - "uid": null, - "version": 0 -} diff --git a/grafana-dashboards/cfk/schema-registry-cluster.json b/grafana-dashboards/cfk/schema-registry-cluster.json deleted file mode 100644 index 30f4c28e..00000000 --- a/grafana-dashboards/cfk/schema-registry-cluster.json +++ /dev/null @@ -1,937 +0,0 @@ -{ - "__inputs": [ - { - "description": "", - "label": "Prometheus", - "name": "DS_PROMETHEUS", - "pluginId": "prometheus", - "pluginName": "Prometheus", - "type": "datasource" - } - ], - "annotations": { - "list": [] - }, - "description": "Overview of the Schema Registry cluster", - "editable": true, - "gnetId": null, - "hideControls": false, - "id": null, - "links": [], - "panels": [ - { - "cacheTimeout": null, - "collapsed": false, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 1, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Overview", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Schema Registry online instances returning metrics.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "red", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - }, - { - "color": "yellow", - "index": 1, - "line": true, - "op": "gt", - "value": 1.0, - "yaxis": "left" - }, - { - "color": "green", - "index": 2, - "line": true, - "op": "gt", - "value": 2.0, - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 0, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 2, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "count(kafka_schema_registry_registered_count{namespace=\"$env\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "SR: Online instances", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Average number of registered schemas across the cluster.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 4, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 3, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "avg(kafka_schema_registry_registered_count{namespace=\"$env\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "SR: Registered Schemas (avg.)", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Average number of schemas created, by type.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 8, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 4, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "avg(kafka_schema_registry_schemas_created{namespace=\"$env\"}) by (schema_type)", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{schema_type}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "SR: Created Schemas by Type (avg.)", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 12, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 5, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(kafka_schema_registry_schemas_deleted{namespace=\"$env\"}) by (schema_type)", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{schema_type}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "SR: Sum of Deleted Schemas by Type", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 16, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 6, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(kafka_schema_registry_kafka_schema_registry_metrics_connection_count)", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "SR: Sum of Active Connections", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "collapsed": false, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 1 - }, - "height": null, - "hideTimeOverride": false, - "id": 7, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "System", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 1 - }, - "height": null, - "hideTimeOverride": false, - "id": 8, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "irate(process_cpu_seconds_total{namespace=\"$env\",pod=~\"$sr_server\"}[5m])", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "CPU usage", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 1 - }, - "height": null, - "hideTimeOverride": false, - "id": 9, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum without(area)(jvm_memory_bytes_used{namespace=\"$env\",pod=~\"$sr_server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Memory usage", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 1 - }, - "height": null, - "hideTimeOverride": false, - "id": 10, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum without(gc)(irate(jvm_gc_collection_seconds_sum{namespace=\"$env\",pod=~\"$sr_server\"}[5m]))", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "GC collection", - "transformations": [], - "transparent": false, - "type": "timeseries" - } - ], - "refresh": "30s", - "rows": [], - "schemaVersion": 12, - "sharedCrosshair": false, - "style": "dark", - "tags": [ - "confluent", - "schema-registry" - ], - "templating": { - "list": [ - { - "allValue": null, - "auto": false, - "auto_count": 30, - "auto_min": "10s", - "current": { - "selected": false, - "tags": [], - "text": null, - "value": null - }, - "datasource": "Prometheus", - "hide": 0, - "includeAll": false, - "label": "Environment", - "multi": false, - "name": "env", - "options": [], - "query": "label_values(namespace)", - "refresh": 1, - "regex": null, - "sort": 1, - "tagValuesQuery": null, - "tagsQuery": null, - "type": "query", - "useTags": false - }, - { - "allValue": null, - "auto": false, - "auto_count": 30, - "auto_min": "10s", - "current": { - "selected": false, - "tags": [], - "text": null, - "value": null - }, - "datasource": "Prometheus", - "hide": 0, - "includeAll": true, - "label": "Server", - "multi": true, - "name": "sr_server", - "options": [], - "query": "label_values(kafka_schema_registry_registered_count{namespace=\"$env\"}, pod)", - "refresh": 1, - "regex": null, - "sort": 1, - "tagValuesQuery": null, - "tagsQuery": null, - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "hidden": false, - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "browser", - "title": "Schema Registry cluster - v2", - "uid": null, - "version": 0 -} diff --git a/grafana-dashboards/cfk/zookeeper-cluster.json b/grafana-dashboards/cfk/zookeeper-cluster.json deleted file mode 100644 index 3fa22d69..00000000 --- a/grafana-dashboards/cfk/zookeeper-cluster.json +++ /dev/null @@ -1,1914 +0,0 @@ -{ - "__inputs": [ - { - "description": "", - "label": "Prometheus", - "name": "DS_PROMETHEUS", - "pluginId": "prometheus", - "pluginName": "Prometheus", - "type": "datasource" - } - ], - "annotations": { - "list": [] - }, - "description": "Overview of the Zookeeper cluster", - "editable": true, - "gnetId": null, - "hideControls": false, - "id": null, - "links": [], - "panels": [ - { - "cacheTimeout": null, - "collapsed": false, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 1, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Overview", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Quorum Size of Zookeeper ensemble.\n Count Zookeeper servers with quorum size metric.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "red", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - }, - { - "color": "yellow", - "index": 1, - "line": true, - "op": "gt", - "value": 2.0, - "yaxis": "left" - }, - { - "color": "green", - "index": 2, - "line": true, - "op": "gt", - "value": 3.0, - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 0, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 2, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "count(zookeeper_status_quorumsize{namespace=\"$env\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "ZK: Quorum Size", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Average size of ZNodes in the cluster.\n Getting the node count per server, and averaging the node count.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 4, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 3, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "avg(zookeeper_inmemorydatatree_nodecount{namespace=\"$env\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "ZK: ZNodes (avg.)", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Sum of the number of alive connections per servers divided by the maximum number of client connections allowed per host.\n If the percentage is higher than 60%, then Zookeeper should be scaled and/or the Zookeeper clients should be investigated to find the reason for high number of connections opened.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - }, - { - "color": "yellow", - "index": 1, - "line": true, - "op": "gt", - "value": 0.6, - "yaxis": "left" - }, - { - "color": "red", - "index": 2, - "line": true, - "op": "gt", - "value": 0.8, - "yaxis": "left" - } - ] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 8, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 4, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "zookeeper_numaliveconnections{namespace=\"$env\"} / zookeeper_maxclientcnxnsperhost{namespace=\"$env\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "ZK: Connections used", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Sum of client watchers subscribed to changes on the ZNodes.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 12, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 5, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(zookeeper_inmemorydatatree_watchcount{namespace=\"$env\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "ZK: Sum of watchers", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Number of requests waiting for processing (queued).\n If the number of outstanding requests grows higher than 10, then the Zookeeper hosts should be checked.\n It could mean that there is not enough resources to cope with the number of requests.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - }, - { - "color": "yellow", - "index": 1, - "line": true, - "op": "gt", - "value": 1.0, - "yaxis": "left" - }, - { - "color": "red", - "index": 2, - "line": true, - "op": "gt", - "value": 10.0, - "yaxis": "left" - } - ] - }, - "unit": "" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 8, - "x": 16, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 6, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "last" - ], - "displayMode": "table", - "placement": "right" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "zookeeper_outstandingrequests{namespace=\"$env\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod}} ({{server_id}}:{{member_type}})", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "ZK: Outstanding Requests", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "collapsed": false, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 1 - }, - "height": null, - "hideTimeOverride": false, - "id": 7, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "System", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 1 - }, - "height": null, - "hideTimeOverride": false, - "id": 8, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "irate(process_cpu_seconds_total{namespace=\"$env\",pod=~\"$zk_server\"}[5m])", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "CPU usage", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 1 - }, - "height": null, - "hideTimeOverride": false, - "id": 9, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum without(area)(jvm_memory_bytes_used{namespace=\"$env\",pod=\"$zk_server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Memory usage", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 1 - }, - "height": null, - "hideTimeOverride": false, - "id": 10, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum without(gc)(irate(jvm_gc_collection_seconds_sum{namespace=\"$env\",pod=\"$zk_server\"}[5m]))", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "GC collection", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 2 - }, - "height": null, - "hideTimeOverride": false, - "id": 11, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 2 - }, - "height": null, - "hideTimeOverride": false, - "id": 12, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "zookeeper_minrequestlatency{namespace=\"$env\"} * zookeeper_ticktime", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "ZK: Request Latency (Minimum)", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 2 - }, - "height": null, - "hideTimeOverride": false, - "id": 13, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "zookeeper_avgrequestlatency{namespace=\"$env\"} * zookeeper_ticktime", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "ZK: Request Latency (Average)", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 2 - }, - "height": null, - "hideTimeOverride": false, - "id": 14, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "zookeeper_maxrequestlatency{namespace=\"$env\"} * zookeeper_ticktime", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "ZK: Request Latency (Maximum)", - "transformations": [], - "transparent": false, - "type": "timeseries" - } - ], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Server Latency", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 3 - }, - "height": null, - "hideTimeOverride": false, - "id": 15, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 3 - }, - "height": null, - "hideTimeOverride": false, - "id": 16, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_server_zookeeperclientmetrics_zookeeperrequestlatencyms{namespace=\"$env\",quantile=~\"$quantile\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Kafka: Request Latency", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 3 - }, - "height": null, - "hideTimeOverride": false, - "id": 17, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_server_sessionexpirelistener_zookeepersyncconnectspersec{namespace=\"$env\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Kafka: Sync Connections/sec", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 3 - }, - "height": null, - "hideTimeOverride": false, - "id": 18, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_server_sessionexpirelistener_zookeeperexpirespersec{namespace=\"$env\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Kafka: Expired Connections/sec", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 4 - }, - "height": null, - "hideTimeOverride": false, - "id": 19, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_server_sessionexpirelistener_zookeeperdisconnectspersec{namespace=\"$env\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Kafka: Disconnected Connections/sec", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 4 - }, - "height": null, - "hideTimeOverride": false, - "id": 20, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_server_sessionexpirelistener_zookeeperauthfailurespersec{namespace=\"$env\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Kafka: Auth Failures on Connections/sec", - "transformations": [], - "transparent": false, - "type": "timeseries" - } - ], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Client Latency (Kafka)", - "transformations": [], - "transparent": false, - "type": "row" - } - ], - "refresh": "30s", - "rows": [], - "schemaVersion": 12, - "sharedCrosshair": false, - "style": "dark", - "tags": [ - "confluent", - "kafka", - "zookeeper" - ], - "templating": { - "list": [ - { - "allValue": null, - "auto": false, - "auto_count": 30, - "auto_min": "10s", - "current": { - "selected": false, - "tags": [], - "text": null, - "value": null - }, - "datasource": "Prometheus", - "hide": 0, - "includeAll": false, - "label": "Environment", - "multi": false, - "name": "env", - "options": [], - "query": "label_values(namespace)", - "refresh": 1, - "regex": null, - "sort": 1, - "tagValuesQuery": null, - "tagsQuery": null, - "type": "query", - "useTags": false - }, - { - "allValue": null, - "auto": false, - "auto_count": 30, - "auto_min": "10s", - "current": { - "selected": false, - "tags": [], - "text": null, - "value": null - }, - "datasource": "Prometheus", - "hide": 0, - "includeAll": true, - "label": "Server", - "multi": true, - "name": "zk_server", - "options": [], - "query": "label_values(zookeeper_outstandingrequests{namespace=\"$env\"}, pod)", - "refresh": 1, - "regex": null, - "sort": 1, - "tagValuesQuery": null, - "tagsQuery": null, - "type": "query", - "useTags": false - }, - { - "allValue": null, - "auto": false, - "auto_count": 30, - "auto_min": "10s", - "current": { - "selected": false, - "tags": [], - "text": null, - "value": null - }, - "datasource": "Prometheus", - "hide": 0, - "includeAll": false, - "label": "Quantile", - "multi": false, - "name": "quantile", - "options": [], - "query": "label_values(kafka_server_zookeeperclientmetrics_zookeeperrequestlatencyms{namespace=\"$env\"}, quantile)", - "refresh": 1, - "regex": null, - "sort": 1, - "tagValuesQuery": null, - "tagsQuery": null, - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "hidden": false, - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "browser", - "title": "Zookeeper cluster - v2", - "uid": null, - "version": 0 -} diff --git a/grafana-dashboards/default/confluent-platform.json b/grafana-dashboards/default/confluent-platform.json deleted file mode 100644 index 4e89ac2d..00000000 --- a/grafana-dashboards/default/confluent-platform.json +++ /dev/null @@ -1,2681 +0,0 @@ -{ - "__inputs": [ - { - "description": "", - "label": "Prometheus", - "name": "DS_PROMETHEUS", - "pluginId": "prometheus", - "pluginName": "Prometheus", - "type": "datasource" - } - ], - "annotations": { - "list": [] - }, - "description": "Overview of the main health-check metrics from Confluent Platform components.", - "editable": true, - "gnetId": null, - "hideControls": false, - "id": null, - "links": [], - "panels": [ - { - "cacheTimeout": null, - "collapsed": false, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 1, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Zookeeper cluster", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Quorum Size of Zookeeper ensemble.\n Count Zookeeper servers with quorum size metric.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "red", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - }, - { - "color": "yellow", - "index": 1, - "line": true, - "op": "gt", - "value": 2.0, - "yaxis": "left" - }, - { - "color": "green", - "index": 2, - "line": true, - "op": "gt", - "value": 3.0, - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 0, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 2, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "count(zookeeper_status_quorumsize{env=\"$env\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "ZK: Quorum Size", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Average size of ZNodes in the cluster.\n Getting the node count per server, and averaging the node count.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 4, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 3, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "avg(zookeeper_inmemorydatatree_nodecount{env=\"$env\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "ZK: ZNodes (avg.)", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Sum of the number of alive connections per servers divided by the maximum number of client connections allowed per host.\n If the percentage is higher than 60%, then Zookeeper should be scaled and/or the Zookeeper clients should be investigated to find the reason for high number of connections opened.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - }, - { - "color": "yellow", - "index": 1, - "line": true, - "op": "gt", - "value": 0.6, - "yaxis": "left" - }, - { - "color": "red", - "index": 2, - "line": true, - "op": "gt", - "value": 0.8, - "yaxis": "left" - } - ] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 8, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 4, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "zookeeper_numaliveconnections{env=\"$env\"} / zookeeper_maxclientcnxnsperhost{env=\"$env\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "ZK: Connections used", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Sum of client watchers subscribed to changes on the ZNodes.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 12, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 5, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(zookeeper_inmemorydatatree_watchcount{env=\"$env\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "ZK: Sum of watchers", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Number of requests waiting for processing (queued).\n If the number of outstanding requests grows higher than 10, then the Zookeeper hosts should be checked.\n It could mean that there is not enough resources to cope with the number of requests.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - }, - { - "color": "yellow", - "index": 1, - "line": true, - "op": "gt", - "value": 1.0, - "yaxis": "left" - }, - { - "color": "red", - "index": 2, - "line": true, - "op": "gt", - "value": 10.0, - "yaxis": "left" - } - ] - }, - "unit": "" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 8, - "x": 16, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 6, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "last" - ], - "displayMode": "table", - "placement": "right" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "zookeeper_outstandingrequests{env=\"$env\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod}} ({{server_id}}:{{member_type}})", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "ZK: Outstanding Requests", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "collapsed": false, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 1 - }, - "height": null, - "hideTimeOverride": false, - "id": 7, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Kafka cluster", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Count of brokers available (online).\n This value is referential and should not be used for alerting.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 0, - "y": 1 - }, - "height": null, - "hideTimeOverride": false, - "id": 8, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "count(kafka_server_replicamanager_leadercount{env=\"$env\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Kafka: Online Brokers", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Active Controller broker.\n It should always be 1. If the value is different than 1, then it must be alerted for troubleshooting.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 4, - "y": 1 - }, - "height": null, - "hideTimeOverride": false, - "id": 9, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "value_and_name" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_controller_kafkacontroller_activecontrollercount{env=\"$env\"} > 0", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Kafka: Active Controller", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Sum of Topic partitions across the cluster.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 8, - "y": 1 - }, - "height": null, - "hideTimeOverride": false, - "id": 10, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(kafka_server_replicamanager_partitioncount{env=\"$env\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Kafka: Sum of Partitions", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Sum of Under-Replicated Partitions. This is caused by broker or volumes unavailable, impacting replicas to be offline, and reducing the ISR set for those partitions.\n There are transient scenarios that could lead to this number growing (e.g. broker restart), but if the number doesn't shrink in a short period of time (e.g. 1 minute), then it's recommended to alert.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - }, - { - "color": "red", - "index": 1, - "line": true, - "op": "gt", - "value": 1.0, - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 12, - "y": 1 - }, - "height": null, - "hideTimeOverride": false, - "id": 11, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(kafka_server_replicamanager_underreplicatedpartitions{env=\"$env\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Kafka: Sum of Under-Replicated Partitions (URP)", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Number of partitions where the number of replicas offline is higher than the minimum ISR configuration.\n This means partitions are not available for Producers with acks=all.\n It's recommended alerting when this values is higher than 0.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - }, - { - "color": "red", - "index": 1, - "line": true, - "op": "gt", - "value": 1.0, - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 16, - "y": 1 - }, - "height": null, - "hideTimeOverride": false, - "id": 12, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(kafka_cluster_partition_underminisr{env=\"$env\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Kafka: Sum of Under-MinISR Partitions", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Number of partitions where all replicas are offline.\n Producers and Consumers are affected by this condition.\n It's recommended alerting when this values is higher than 0.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - }, - { - "color": "red", - "index": 1, - "line": true, - "op": "gt", - "value": 1.0, - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 20, - "y": 1 - }, - "height": null, - "hideTimeOverride": false, - "id": 13, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(kafka_controller_kafkacontroller_offlinepartitionscount{env=\"$env\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Kafka: Sum of Offline Partitions", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "collapsed": false, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 2 - }, - "height": null, - "hideTimeOverride": false, - "id": 14, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Schema Registry cluster", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Schema Registry online instances returning metrics.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "red", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - }, - { - "color": "yellow", - "index": 1, - "line": true, - "op": "gt", - "value": 1.0, - "yaxis": "left" - }, - { - "color": "green", - "index": 2, - "line": true, - "op": "gt", - "value": 2.0, - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 0, - "y": 2 - }, - "height": null, - "hideTimeOverride": false, - "id": 15, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "count(kafka_schema_registry_registered_count{env=\"$env\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "SR: Online instances", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Average number of registered schemas across the cluster.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 4, - "y": 2 - }, - "height": null, - "hideTimeOverride": false, - "id": 16, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "avg(kafka_schema_registry_registered_count{env=\"$env\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "SR: Registered Schemas (avg.)", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Average number of schemas created, by type.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 8, - "y": 2 - }, - "height": null, - "hideTimeOverride": false, - "id": 17, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "avg(kafka_schema_registry_schemas_created{env=\"$env\"}) by (schema_type)", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{schema_type}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "SR: Schemas Created by Type (avg.)", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Average number of schemas deleted, by type.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 12, - "y": 2 - }, - "height": null, - "hideTimeOverride": false, - "id": 18, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(kafka_schema_registry_schemas_deleted{env=\"$env\"}) by (schema_type)", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{schema_type}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "SR: Schemas Deleted by Type (avg.)", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 3 - }, - "height": null, - "hideTimeOverride": false, - "id": 19, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Kafka Connect online workers returning metrics.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 0, - "y": 3 - }, - "height": null, - "hideTimeOverride": false, - "id": 20, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "count(kafka_connect_connect_worker_metrics_connector_count{env=\"$env\",kafka_connect_cluster_id=~\"$connect_cluster\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Connect: Online Workers", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Number of tasks deployed on Kafka Connect cluster.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 4, - "y": 3 - }, - "height": null, - "hideTimeOverride": false, - "id": 21, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(kafka_connect_connect_worker_metrics_connector_total_task_count{env=\"$env\",kafka_connect_cluster_id=~\"$connect_cluster\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Connect: Sum of Total Tasks", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Number of Running Tasks on the Kafka Connect cluster.\n Ideally, this number should be equal to the total number of tasks deployed.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - }, - { - "color": "green", - "index": 1, - "line": true, - "op": "gt", - "value": 1.0, - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 8, - "y": 3 - }, - "height": null, - "hideTimeOverride": false, - "id": 22, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(kafka_connect_connect_worker_metrics_connector_running_task_count{env=\"$env\",kafka_connect_cluster_id=~\"$connect_cluster\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Connect: Sum of Running Tasks", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Number of Paused Tasks on the Kafka Connect cluster.\n Ideally, this number should be zero, as tasks should be running.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - }, - { - "color": "yellow", - "index": 1, - "line": true, - "op": "gt", - "value": 1.0, - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 12, - "y": 3 - }, - "height": null, - "hideTimeOverride": false, - "id": 23, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(kafka_connect_connect_worker_metrics_connector_paused_task_count{env=\"$env\",kafka_connect_cluster_id=~\"$connect_cluster\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Connect: Sum of Paused Tasks", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Number of Paused Tasks on the Kafka Connect cluster.\n Ideally, this number should be zero, as tasks should be running.\n It's recommended alerting when this value is higher than 0.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - }, - { - "color": "red", - "index": 1, - "line": true, - "op": "gt", - "value": 1.0, - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 16, - "y": 3 - }, - "height": null, - "hideTimeOverride": false, - "id": 24, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(kafka_connect_connect_worker_metrics_connector_failed_task_count{env=\"$env\",kafka_connect_cluster_id=~\"$connect_cluster\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Connect: Sum of Failed Tasks", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Informative value. Time since last rebalance.\n When this value is continuously and repeatedly low means some connectors are failing and rebalancing is triggered constantly.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - } - ] - }, - "unit": "clockms" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 20, - "y": 3 - }, - "height": null, - "hideTimeOverride": false, - "id": 25, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_connect_connect_worker_rebalance_metrics_time_since_last_rebalance_ms{env=\"$env\",kafka_connect_cluster_id=~\"$connect_cluster\"} >= 0", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{pod}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Connect: Time since last rebalance", - "transformations": [], - "transparent": false, - "type": "stat" - } - ], - "repeat": "connect_cluster", - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Kafka Connect cluster: $connect_cluster", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 4 - }, - "height": null, - "hideTimeOverride": false, - "id": 26, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "ksqlDB online instances returning metrics.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 0, - "y": 4 - }, - "height": null, - "hideTimeOverride": false, - "id": 27, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "count(ksql_ksql_engine_query_stats_num_active_queries{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "ksqlDB: Online instances", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Number of active queries deployed in the cluster.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 4, - "y": 4 - }, - "height": null, - "hideTimeOverride": false, - "id": 28, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(ksql_ksql_engine_query_stats_num_active_queries{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "ksqlDB: Sum of Active Queries", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Number of running queries deployed in the cluster.\n Ideally, this number should be equal to the number of active queries as queries should be running.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - }, - { - "color": "green", - "index": 1, - "line": true, - "op": "gt", - "value": 1.0, - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 8, - "y": 4 - }, - "height": null, - "hideTimeOverride": false, - "id": 29, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(ksql_ksql_engine_query_stats_running_queries{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "ksqlDB: Sum of Running Queries", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Number of queries rebalancing in the cluster.\n Ideally, this number should be equal zero, or return to zero in a short period (e.g. 1 minute).\n It's recommended to alert if the number of rebalancing queries stay higher than 0 for a longer period of time.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - }, - { - "color": "yellow", - "index": 1, - "line": true, - "op": "gt", - "value": 1.0, - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 12, - "y": 4 - }, - "height": null, - "hideTimeOverride": false, - "id": 30, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(ksql_ksql_engine_query_stats_rebalancing_queries{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "ksqlDB: Sum of Rebalancing Queries", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Number of queries failed in the cluster.\n Ideally, this number should be equal zero.\n It's recommended to alert if the number of queries failed is higher than 0.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - }, - { - "color": "red", - "index": 1, - "line": true, - "op": "gt", - "value": 1.0, - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 16, - "y": 4 - }, - "height": null, - "hideTimeOverride": false, - "id": 31, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "avg(ksql_ksql_engine_query_stats_error_queries{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Connect: Sum of Queries Failed", - "transformations": [], - "transparent": false, - "type": "stat" - } - ], - "repeat": "ksqldb_cluster", - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "ksqlDB cluster: $ksqldb_cluster", - "transformations": [], - "transparent": false, - "type": "row" - } - ], - "refresh": "30s", - "rows": [], - "schemaVersion": 12, - "sharedCrosshair": false, - "style": "dark", - "tags": [ - "confluent", - "kafka", - "zookeeper", - "kafka-connect", - "schema-registry", - "ksqldb" - ], - "templating": { - "list": [ - { - "allValue": null, - "auto": false, - "auto_count": 30, - "auto_min": "10s", - "current": { - "selected": false, - "tags": [], - "text": null, - "value": null - }, - "datasource": "Prometheus", - "hide": 0, - "includeAll": false, - "label": "Environment", - "multi": false, - "name": "env", - "options": [], - "query": "label_values(env)", - "refresh": 1, - "regex": null, - "sort": 1, - "tagValuesQuery": null, - "tagsQuery": null, - "type": "query", - "useTags": false - }, - { - "allValue": null, - "auto": false, - "auto_count": 30, - "auto_min": "10s", - "current": { - "selected": false, - "tags": [], - "text": null, - "value": null - }, - "datasource": "Prometheus", - "hide": true, - "includeAll": false, - "label": "Kafka Connect cluster", - "multi": false, - "name": "connect_cluster", - "options": [], - "query": "label_values(kafka_connect_connect_worker_metrics_connector_count{env=\"$env\"}, kafka_connect_cluster_id)", - "refresh": 1, - "regex": null, - "sort": 1, - "tagValuesQuery": null, - "tagsQuery": null, - "type": "query", - "useTags": false - }, - { - "allValue": null, - "auto": false, - "auto_count": 30, - "auto_min": "10s", - "current": { - "selected": false, - "tags": [], - "text": null, - "value": null - }, - "datasource": "Prometheus", - "hide": true, - "includeAll": false, - "label": "ksqlDB cluster", - "multi": false, - "name": "ksqldb_cluster", - "options": [], - "query": "label_values(ksql_ksql_engine_query_stats_liveness_indicator{env=\"$env\"}, ksqldb_cluster_id)", - "refresh": 1, - "regex": null, - "sort": 1, - "tagValuesQuery": null, - "tagsQuery": null, - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "hidden": false, - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "browser", - "title": "Confluent Platform overview - v2", - "uid": null, - "version": 0 -} diff --git a/grafana-dashboards/default/kafka-cluster.json b/grafana-dashboards/default/kafka-cluster.json deleted file mode 100644 index 1c7f6dbe..00000000 --- a/grafana-dashboards/default/kafka-cluster.json +++ /dev/null @@ -1,5537 +0,0 @@ -{ - "__inputs": [ - { - "description": "", - "label": "Prometheus", - "name": "DS_PROMETHEUS", - "pluginId": "prometheus", - "pluginName": "Prometheus", - "type": "datasource" - } - ], - "annotations": { - "list": [] - }, - "description": "Overview of the Kafka cluster", - "editable": true, - "gnetId": null, - "hideControls": false, - "id": null, - "links": [], - "panels": [ - { - "cacheTimeout": null, - "collapsed": false, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 1, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Cluster Overview", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Count of brokers available (online).\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 0, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 2, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "count(kafka_server_replicamanager_leadercount{env=\"$env\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Kafka: Online Brokers", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Active Controller broker.\n It should always be 1. If the value is different than 1, then it must be alerted for troubleshooting.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 4, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 3, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "value_and_name" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_controller_kafkacontroller_activecontrollercount{env=\"$env\"} > 0", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Kafka: Active Controller", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "\n Number of partitions where the preferred replica is not the leader.\n Usually, this number is 0.\n Restarting nodes could cause this values to change, but when reassigning happens the value stabilize.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 8, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 4, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(kafka_controller_kafkacontroller_preferredreplicaimbalancecount{env=\"$env\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Kafka: Sum of Preferred Replica Imbalance", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Number of topics in the cluster.", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 12, - "y": 1 - }, - "height": null, - "hideTimeOverride": false, - "id": 5, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(kafka_controller_kafkacontroller_globaltopiccount{env=\"$env\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Kafka: Sum of Topics", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Sum of requests per second rated over a 5 min. period.\n Gives an idea of the processing load in the cluster.", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - } - ] - }, - "unit": "reqps" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 16, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 6, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(rate(kafka_network_requestmetrics_requestspersec{env=\"$env\",hostname=~\"$broker\"}[5m]))", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Kafka: Rate of Requests/Sec", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Sum of log sizes per broker.\n This must be compared with the total storage space available in the brokers.", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 20, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 7, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "value_and_name" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(kafka_log_log_size{env=\"$env\",hostname=~\"$broker\"}) by (hostname)", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Kafka: Log Size", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Sum of Topic partitions across the cluster.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 0, - "y": 1 - }, - "height": null, - "hideTimeOverride": false, - "id": 8, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(kafka_server_replicamanager_partitioncount{env=\"$env\",hostname=~\"$broker\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Kafka: Sum of Partitions", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Sum of Under-Replicated Partitions. This is caused by broker or volumes unavailable, impacting replicas to be offline, and reducing the ISR set for those partitions.\n There are transient scenarios that could lead to this number growing (e.g. broker restart), but if the number doesn't shrink in a short period of time (e.g. 1 minute), then it's recommended to alert.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - }, - { - "color": "red", - "index": 1, - "line": true, - "op": "gt", - "value": 1.0, - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 4, - "y": 1 - }, - "height": null, - "hideTimeOverride": false, - "id": 9, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(kafka_server_replicamanager_underreplicatedpartitions{env=\"$env\",hostname=~\"$broker\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Kafka: Sum of Under-Replicated Partitions (URP)", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Number of partitions where the number of replicas offline is higher than the minimum ISR configuration.\n This means partitions are not available for Producers with acks=all.\n We recommend alerting when this values is higher than 0.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - }, - { - "color": "red", - "index": 1, - "line": true, - "op": "gt", - "value": 1.0, - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 8, - "y": 1 - }, - "height": null, - "hideTimeOverride": false, - "id": 10, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(kafka_cluster_partition_underminisr{env=\"$env\",hostname=~\"$broker\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Kafka: Sum of Under-MinISR Partitions", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Number of partitions where all replicas are offline.\n Producers and Consumers are affected by this condition.\n We recommend alerting when this values is higher than 0.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - }, - { - "color": "red", - "index": 1, - "line": true, - "op": "gt", - "value": 1.0, - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 12, - "y": 1 - }, - "height": null, - "hideTimeOverride": false, - "id": 11, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(kafka_controller_kafkacontroller_offlinepartitionscount{env=\"$env\",hostname=~\"$broker\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Kafka: Sum of Offline Partitions", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Sum of bytes in per second rated over a 5 min. period.\n Gives an idea of the incoming throughput handle by the cluster.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - } - ] - }, - "unit": "binBps" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 16, - "y": 1 - }, - "height": null, - "hideTimeOverride": false, - "id": 12, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(rate(kafka_server_brokertopicmetrics_bytesinpersec{env=\"$env\",hostname=~\"$broker\"}[5m]))", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Kafka: Bytes In/Sec", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Sum of bytes out per second rated over a 5 min. period.\n Gives an idea of the outgoing throughput handle by the cluster.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - } - ] - }, - "unit": "binBps" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 20, - "y": 1 - }, - "height": null, - "hideTimeOverride": false, - "id": 13, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(rate(kafka_server_brokertopicmetrics_bytesoutpersec{env=\"$env\",hostname=~\"$broker\"}[5m]))", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Kafka: Bytes Out/Sec", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "collapsed": false, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 2 - }, - "height": null, - "hideTimeOverride": false, - "id": 14, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "System resources", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Rate of CPU seconds used by the Java process.\n 100% usage represents one core. \n If there are multiple cores, the total capacity should be 100% * number_cores.", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 2 - }, - "height": null, - "hideTimeOverride": false, - "id": 15, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "irate(process_cpu_seconds_total{env=\"$env\",hostname=~\"$broker\"}[5m])", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "CPU usage", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Sum of JVM memory used, without including areas (e.g. heap size).", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 2 - }, - "height": null, - "hideTimeOverride": false, - "id": 16, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum without(area)(jvm_memory_bytes_used{env=\"$env\",hostname=~\"$broker\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Memory usage", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Sum of seconds used by Garbage Collection.", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 2 - }, - "height": null, - "hideTimeOverride": false, - "id": 17, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum without(gc)(irate(jvm_gc_collection_seconds_sum{env=\"$env\",hostname=~\"$broker\"}[5m]))", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "GC collection", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 3 - }, - "height": null, - "hideTimeOverride": false, - "id": 18, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Number of messages into topics per second, aggregated by sum without topic.", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "cps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 3 - }, - "height": null, - "hideTimeOverride": false, - "id": 19, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum without(topic) (rate(kafka_server_brokertopicmetrics_messagesinpersec{env=\"$env\",hostname=~\"$broker\"}[5m]))", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Messages In/Sec", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Number of bytes into topics per second, aggregated by sum without topic.", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "binBps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 3 - }, - "height": null, - "hideTimeOverride": false, - "id": 20, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum without(topic) (rate(kafka_server_brokertopicmetrics_bytesinpersec{env=\"$env\",hostname=~\"$broker\"}[5m]))", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Bytes In/Sec", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Number of bytes out of topics per second, aggregated by sum without topic.", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "binBps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 3 - }, - "height": null, - "hideTimeOverride": false, - "id": 21, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum without(topic) (rate(kafka_server_brokertopicmetrics_bytesoutpersec{env=\"$env\",hostname=~\"$broker\"}[5m]))", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Bytes Out/Sec", - "transformations": [], - "transparent": false, - "type": "timeseries" - } - ], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Throughput", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 4 - }, - "height": null, - "hideTimeOverride": false, - "id": 22, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Percent of time the network thread pool is used.\n It should be below 60% or the capacity of threads should be tuned or \n the cluster scaled to cope with the load.", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 4 - }, - "height": null, - "hideTimeOverride": false, - "id": 23, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "1-kafka_network_socketserver_networkprocessoravgidlepercent{env=\"$env\",hostname=~\"$broker\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Network processor usage", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Percent of time the IO thread pool is used.\n It should be below 60% or the capacity of threads should be tuned or \n the cluster scaled to cope with the load.", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 4 - }, - "height": null, - "hideTimeOverride": false, - "id": 24, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "1-kafka_server_kafkarequesthandlerpool_requesthandleravgidlepercent_total{env=\"$env\",hostname=~\"$broker\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Request processor (IO) usage", - "transformations": [], - "transparent": false, - "type": "timeseries" - } - ], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Thread utilization", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 5 - }, - "height": null, - "hideTimeOverride": false, - "id": 25, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Requests per second rated over a 5 minutes period.\n Includes API call and version.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "reqps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 5 - }, - "height": null, - "hideTimeOverride": false, - "id": 26, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum without(pod,instance,statefulset_kubernetes_io_pod_name)(rate(kafka_network_requestmetrics_requestspersec{env=\"$env\",hostname=~\"$broker\"}[5m]))", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{request}}(v{{version}})", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Requests rates", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Request Errors per second rated over a 5 minutes period.\n Includes API call and version.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "reqps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 5 - }, - "height": null, - "hideTimeOverride": false, - "id": 27, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum without(pod,instance,statefulset_kubernetes_io_pod_name)(rate(kafka_network_requestmetrics_errorspersec{env=\"$env\",hostname=~\"$broker\",error!=\"NONE\"}[5m]))", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{error}}@{{request}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Error rates", - "transformations": [], - "transparent": false, - "type": "timeseries" - } - ], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Request rates", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 6 - }, - "height": null, - "hideTimeOverride": false, - "id": 28, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Sum of connections count across cluster by brokers", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 6 - }, - "height": null, - "hideTimeOverride": false, - "id": 29, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(kafka_server_socketservermetrics_connection_count{env=\"$env\",hostname=~\"$broker\"}) by (hostname)", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Sum of Connections alive per Broker", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Sum of rate of connections created across cluster by brokers", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 6 - }, - "height": null, - "hideTimeOverride": false, - "id": 30, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(kafka_server_socketservermetrics_connection_creation_rate{env=\"$env\",hostname=~\"$broker\"}) by (hostname)", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Sum of Connections creation rate per Broker", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Sum of rate of connections closed across cluster by brokers", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 6 - }, - "height": null, - "hideTimeOverride": false, - "id": 31, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(kafka_server_socketservermetrics_connection_close_rate{env=\"$env\",hostname=~\"$broker\"}) by (hostname)", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Sum of Connections close rate per Broker", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Sum of connections count across cluster by listeners", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 7 - }, - "height": null, - "hideTimeOverride": false, - "id": 32, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(kafka_server_socketservermetrics_connection_count{env=\"$env\",hostname=~\"$broker\"}) by (listener)", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{listener}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Sum of Connections alive per Listener", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Sum of rate of connections created across cluster by listener", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 7 - }, - "height": null, - "hideTimeOverride": false, - "id": 33, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(kafka_server_socketservermetrics_connection_creation_rate{env=\"$env\",hostname=~\"$broker\"}) by (listener)", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{listener}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Sum of Connections creation rate per Listener", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Sum of rate of connections closed across cluster by listener", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 7 - }, - "height": null, - "hideTimeOverride": false, - "id": 34, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(kafka_server_socketservermetrics_connection_close_rate{env=\"$env\",hostname=~\"$broker\"}) by (listener)", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{listener}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Sum of Connections close rate per Listener", - "transformations": [], - "transparent": false, - "type": "timeseries" - } - ], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Connections", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 8 - }, - "height": null, - "hideTimeOverride": false, - "id": 35, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Rate of ISR shrinks per second.\n If this value is continuously higher than 0, then troubleshoot cause of ISR changing constantly.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 8 - }, - "height": null, - "hideTimeOverride": false, - "id": 36, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "rate(kafka_server_replicamanager_isrshrinkspersec{env=\"$env\",hostname=~\"$broker\"}[5m])", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Rate of ISR Shrinks/sec", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Rate of ISR expands per second.\n If this value is continuously higher than 0, then troubleshoot cause of ISR changing constantly.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 8 - }, - "height": null, - "hideTimeOverride": false, - "id": 37, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "rate(kafka_server_replicamanager_isrexpandspersec{env=\"$env\",hostname=~\"$broker\"}[5m])", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Rate of ISR Expands/sec", - "transformations": [], - "transparent": false, - "type": "timeseries" - } - ], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "In-Sync Replicas", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 9 - }, - "height": null, - "hideTimeOverride": false, - "id": 38, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Time expend on the request queue.\n Moved from network socket to request queue by Network threads.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 9 - }, - "height": null, - "hideTimeOverride": false, - "id": 39, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_network_requestmetrics_requestqueuetimems{env=\"$env\",hostname=~\"$broker\",quantile=~\"$quantile\",request=\"Produce\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{hostname}} ({{quantile}}th)", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Produce: Request Queue Time", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Time expend doing local IO.\n Moved from request queue to storage device operations by IO threads.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 9 - }, - "height": null, - "hideTimeOverride": false, - "id": 40, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_network_requestmetrics_localtimems{env=\"$env\",hostname=~\"$broker\",quantile=~\"$quantile\",request=\"Produce\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{hostname}} ({{quantile}}th)", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Produce: Local Time", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Time expend waiting for coordination with other brokers/internal condition.\n At purgatory.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 9 - }, - "height": null, - "hideTimeOverride": false, - "id": 41, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_network_requestmetrics_remotetimems{env=\"$env\",hostname=~\"$broker\",quantile=~\"$quantile\",request=\"Produce\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{hostname}} ({{quantile}}th)", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Produce: Remote Time", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Time expend waiting in response queue.\n Moved from purgatory to response queue by IO threads.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 10 - }, - "height": null, - "hideTimeOverride": false, - "id": 42, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_network_requestmetrics_responsequeuetimems{env=\"$env\",hostname=~\"$broker\",quantile=~\"$quantile\",request=\"Produce\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{hostname}} ({{quantile}}th)", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Produce: Response Queue Time", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Time expend delivering response.\n Moved from response queue to client by Networkc threads.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 10 - }, - "height": null, - "hideTimeOverride": false, - "id": 43, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_network_requestmetrics_responsesendtimems{env=\"$env\",hostname=~\"$broker\",quantile=~\"$quantile\",request=\"Produce\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{hostname}} ({{quantile}}th)", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Produce: Response Send Time", - "transformations": [], - "transparent": false, - "type": "timeseries" - } - ], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Request latency: Producer", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 11 - }, - "height": null, - "hideTimeOverride": false, - "id": 44, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Time expend on the request queue.\n Moved from network socket to request queue by Network threads.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 11 - }, - "height": null, - "hideTimeOverride": false, - "id": 45, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_network_requestmetrics_requestqueuetimems{env=\"$env\",hostname=~\"$broker\",quantile=~\"$quantile\",request=\"Fetch\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{hostname}} ({{quantile}}th)", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Fetch: Request Queue Time", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Time expend doing local IO.\n Moved from request queue to storage device operations by IO threads.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 11 - }, - "height": null, - "hideTimeOverride": false, - "id": 46, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_network_requestmetrics_localtimems{env=\"$env\",hostname=~\"$broker\",quantile=~\"$quantile\",request=\"Fetch\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{hostname}} ({{quantile}}th)", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Fetch: Local Time", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Time expend waiting for coordination with other brokers/internal condition.\n At purgatory.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 11 - }, - "height": null, - "hideTimeOverride": false, - "id": 47, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_network_requestmetrics_remotetimems{env=\"$env\",hostname=~\"$broker\",quantile=~\"$quantile\",request=\"Fetch\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{hostname}} ({{quantile}}th)", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Fetch: Remote Time", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Time expend waiting in response queue.\n Moved from purgatory to response queue by IO threads.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 12 - }, - "height": null, - "hideTimeOverride": false, - "id": 48, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_network_requestmetrics_responsequeuetimems{env=\"$env\",hostname=~\"$broker\",quantile=~\"$quantile\",request=\"Fetch\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{hostname}} ({{quantile}}th)", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Fetch: Response Queue Time", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Time expend delivering response.\n Moved from response queue to client by Networkc threads.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 12 - }, - "height": null, - "hideTimeOverride": false, - "id": 49, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_network_requestmetrics_responsesendtimems{env=\"$env\",hostname=~\"$broker\",quantile=~\"$quantile\",request=\"Fetch\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{hostname}} ({{quantile}}th)", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Fetch: Response Send Time", - "transformations": [], - "transparent": false, - "type": "timeseries" - } - ], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Request latency: Consumer Fetch", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 13 - }, - "height": null, - "hideTimeOverride": false, - "id": 50, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Time expend on the request queue.\n Moved from network socket to request queue by Network threads.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 13 - }, - "height": null, - "hideTimeOverride": false, - "id": 51, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_network_requestmetrics_requestqueuetimems{env=\"$env\",hostname=~\"$broker\",quantile=~\"$quantile\",request=\"FetchFollower\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{hostname}} ({{quantile}}th)", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Fetch: Request Queue Time", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Time expend doing local IO.\n Moved from request queue to storage device operations by IO threads.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 13 - }, - "height": null, - "hideTimeOverride": false, - "id": 52, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_network_requestmetrics_localtimems{env=\"$env\",hostname=~\"$broker\",quantile=~\"$quantile\",request=\"FetchFollower\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{hostname}} ({{quantile}}th)", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Fetch: Local Time", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Time expend waiting for coordination with other brokers/internal condition.\n At purgatory.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 13 - }, - "height": null, - "hideTimeOverride": false, - "id": 53, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_network_requestmetrics_remotetimems{env=\"$env\",hostname=~\"$broker\",quantile=~\"$quantile\",request=\"FetchFollower\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{hostname}} ({{quantile}}th)", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Fetch: Remote Time", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Time expend waiting in response queue.\n Moved from purgatory to response queue by IO threads.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 14 - }, - "height": null, - "hideTimeOverride": false, - "id": 54, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_network_requestmetrics_responsequeuetimems{env=\"$env\",hostname=~\"$broker\",quantile=~\"$quantile\",request=\"FetchFollower\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{hostname}} ({{quantile}}th)", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Fetch: Response Queue Time", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Time expend delivering response.\n Moved from response queue to client by Networkc threads.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 14 - }, - "height": null, - "hideTimeOverride": false, - "id": 55, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_network_requestmetrics_responsesendtimems{env=\"$env\",hostname=~\"$broker\",quantile=~\"$quantile\",request=\"FetchFollower\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{hostname}} ({{quantile}}th)", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Fetch: Response Send Time", - "transformations": [], - "transparent": false, - "type": "timeseries" - } - ], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Request latency: Replica Fetch", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 15 - }, - "height": null, - "hideTimeOverride": false, - "id": 56, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Number of groups managed by Broker", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 15 - }, - "height": null, - "hideTimeOverride": false, - "id": 57, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_coordinator_group_groupmetadatamanager_numgroups{env=\"$env\",hostname=~\"$broker\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Number of Groups per Broker", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Number of stable groups managed by Broker", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 15 - }, - "height": null, - "hideTimeOverride": false, - "id": 58, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupsstable{env=\"$env\",hostname=~\"$broker\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "stable", - "metric": "", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": null, - "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupspreparingrebalance{env=\"$env\",hostname=~\"$broker\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "preparing_rebalance", - "metric": "", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": null, - "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupsdead{env=\"$env\",hostname=~\"$broker\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "dead", - "metric": "", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": null, - "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupscompletingrebalance{env=\"$env\",hostname=~\"$broker\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "completing_rebalance", - "metric": "", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": null, - "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupsempty{env=\"$env\",hostname=~\"$broker\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "empty", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Number of Groups per Broker per Status", - "transformations": [], - "transparent": false, - "type": "timeseries" - } - ], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Group Coordinator", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 16 - }, - "height": null, - "hideTimeOverride": false, - "id": 59, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Sum of produce message conversions per second.\n This value increases when the broker receives produce messages from clients using older versions.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "opsps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 16 - }, - "height": null, - "hideTimeOverride": false, - "id": 60, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(kafka_server_brokertopicmetrics_producemessageconversionspersec{env=\"$env\",hostname=~\"$broker\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Sum of Produce conversion rate per sec", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Sum of fetch message conversions per second.\n This value increases when the broker receives fetch messages from clients using older versions.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "opsps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 16 - }, - "height": null, - "hideTimeOverride": false, - "id": 61, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(kafka_server_brokertopicmetrics_fetchmessageconversionspersec{env=\"$env\",hostname=~\"$broker\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Sum of Fetch conversion rate per sec", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Sum of connections aggregated by client version and name.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 16 - }, - "height": null, - "hideTimeOverride": false, - "id": 62, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(kafka_server_socketservermetrics_connections{env=\"$env\",hostname=~\"$broker\"}) by (client_software_name,client_software_version)", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_software_name}} (v{{client_software_version}})", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Sum of Connections per version", - "transformations": [], - "transparent": false, - "type": "timeseries" - } - ], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Message Conversion", - "transformations": [], - "transparent": false, - "type": "row" - } - ], - "refresh": "30s", - "rows": [], - "schemaVersion": 12, - "sharedCrosshair": false, - "style": "dark", - "tags": [ - "confluent", - "kafka" - ], - "templating": { - "list": [ - { - "allValue": null, - "auto": false, - "auto_count": 30, - "auto_min": "10s", - "current": { - "selected": false, - "tags": [], - "text": null, - "value": null - }, - "datasource": "Prometheus", - "hide": 0, - "includeAll": false, - "label": "Environment", - "multi": false, - "name": "env", - "options": [], - "query": "label_values(env)", - "refresh": 1, - "regex": null, - "sort": 1, - "tagValuesQuery": null, - "tagsQuery": null, - "type": "query", - "useTags": false - }, - { - "allValue": null, - "auto": false, - "auto_count": 30, - "auto_min": "10s", - "current": { - "selected": false, - "tags": [], - "text": null, - "value": null - }, - "datasource": "Prometheus", - "hide": 0, - "includeAll": true, - "label": "Broker", - "multi": true, - "name": "broker", - "options": [], - "query": "label_values(kafka_server_replicamanager_leadercount{env=\"$env\"}, hostname)", - "refresh": 1, - "regex": null, - "sort": 1, - "tagValuesQuery": null, - "tagsQuery": null, - "type": "query", - "useTags": false - }, - { - "allValue": null, - "auto": false, - "auto_count": 30, - "auto_min": "10s", - "current": { - "selected": false, - "tags": [], - "text": null, - "value": null - }, - "datasource": "Prometheus", - "hide": 0, - "includeAll": false, - "label": "Quantile", - "multi": false, - "name": "quantile", - "options": [], - "query": "label_values(kafka_network_requestmetrics_requestqueuetimems{env=\"$env\"}, quantile)", - "refresh": 1, - "regex": null, - "sort": 1, - "tagValuesQuery": null, - "tagsQuery": null, - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "hidden": false, - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "browser", - "title": "Kafka cluster - v2", - "uid": null, - "version": 0 -} diff --git a/grafana-dashboards/default/kafka-connect-cluster.json b/grafana-dashboards/default/kafka-connect-cluster.json deleted file mode 100644 index d2e12193..00000000 --- a/grafana-dashboards/default/kafka-connect-cluster.json +++ /dev/null @@ -1,4184 +0,0 @@ -{ - "__inputs": [ - { - "description": "", - "label": "Prometheus", - "name": "DS_PROMETHEUS", - "pluginId": "prometheus", - "pluginName": "Prometheus", - "type": "datasource" - } - ], - "annotations": { - "list": [] - }, - "description": "Overview of the Kafka Connect cluster", - "editable": true, - "gnetId": null, - "hideControls": false, - "id": null, - "links": [], - "panels": [ - { - "cacheTimeout": null, - "collapsed": false, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 1, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Overview", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Kafka Connect online workers returning metrics.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 0, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 2, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "count(kafka_connect_app_info{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",version!=\"\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Connect: Online Workers", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Number of tasks deployed on Kafka Connect cluster.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 4, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 3, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(kafka_connect_connect_worker_metrics_connector_total_task_count{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Connect: Sum of Total Tasks", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Number of Running Tasks on the Kafka Connect cluster.\n Ideally, this number should be equal to the total number of tasks deployed.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - }, - { - "color": "green", - "index": 1, - "line": true, - "op": "gt", - "value": 1.0, - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 8, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 4, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(kafka_connect_connect_worker_metrics_connector_running_task_count{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Connect: Sum of Running Tasks", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Number of Paused Tasks on the Kafka Connect cluster.\n Ideally, this number should be zero, as tasks should be running.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - }, - { - "color": "yellow", - "index": 1, - "line": true, - "op": "gt", - "value": 1.0, - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 12, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 5, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(kafka_connect_connect_worker_metrics_connector_paused_task_count{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Connect: Sum of Paused Tasks", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Number of Paused Tasks on the Kafka Connect cluster.\n Ideally, this number should be zero, as tasks should be running.\n It's recommended alerting when this value is higher than 0.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - }, - { - "color": "red", - "index": 1, - "line": true, - "op": "gt", - "value": 1.0, - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 16, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 6, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(kafka_connect_connect_worker_metrics_connector_failed_task_count{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Connect: Sum of Failed Tasks", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Informative value. Time since last rebalance.\n When this value is continuously and repeatedly low means some connectors are failing and rebalancing is triggered constantly.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - } - ] - }, - "unit": "clockms" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 20, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 7, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_connect_connect_worker_rebalance_metrics_time_since_last_rebalance_ms{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"} >= 0", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Connect: Time since last rebalance", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "color": { - "mode": "thresholds" - }, - "columns": [], - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": { - "align": "auto", - "displayMode": "auto", - "filterable": false - }, - "thresholds": { - "mode": "absolute", - "steps": [] - } - }, - "overrides": [] - }, - "fontSize": "100%", - "gridPos": { - "h": 5, - "w": 24, - "x": 0, - "y": 1 - }, - "height": null, - "hideTimeOverride": false, - "id": 8, - "interval": null, - "links": [], - "mappings": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "showHeader": true - }, - "repeat": null, - "repeatDirection": null, - "span": 6, - "targets": [ - { - "datasource": null, - "expr": "kafka_connect_app_info{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",start_time_ms!=\"\"}", - "format": "table", - "hide": false, - "instant": true, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": null, - "expr": "kafka_connect_app_info{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",version!=\"\"}", - "format": "table", - "hide": false, - "instant": true, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": null, - "expr": "sum by (hostname) (kafka_connect_connect_worker_metrics_connector_count{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"})", - "format": "table", - "hide": false, - "instant": true, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": null, - "expr": "sum by (hostname) (kafka_connect_connect_worker_metrics_connector_startup_success_total{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"})", - "format": "table", - "hide": false, - "instant": true, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": null, - "expr": "sum by (hostname) (kafka_connect_connect_worker_metrics_connector_startup_failure_total{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"})", - "format": "table", - "hide": false, - "instant": true, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": null, - "expr": "sum by (hostname) (kafka_connect_connect_worker_metrics_task_count{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"})", - "format": "table", - "hide": false, - "instant": true, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": null, - "expr": "sum by (hostname) (kafka_connect_connect_worker_metrics_task_startup_success_total{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"})", - "format": "table", - "hide": false, - "instant": true, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": null, - "expr": "sum by (hostname) (kafka_connect_connect_worker_metrics_task_startup_failure_total{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"})", - "format": "table", - "hide": false, - "instant": true, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Connect Workers", - "transformations": [ - { - "id": "seriesToColumns", - "options": { - "byField": "hostname" - } - }, - { - "id": "filterFieldsByName", - "options": { - "include": { - "names": [ - "hostname", - "kafka_connect_cluster_id 1", - "start_time_ms", - "version", - "Value #C", - "Value #D", - "Value #E", - "Value #F", - "Value #G", - "Value #H", - "env 1" - ] - } - } - }, - { - "id": "organize", - "options": { - "excludeByName": {}, - "indexByName": { - "env 1": 0, - "hostname": 2, - "kafka_connect_cluster_id 1": 1, - "start_time_ms": 3, - "version": 4 - }, - "renameByName": { - "Value #C": "connectors", - "Value #D": "conn. success", - "Value #E": "conn. failure", - "Value #F": "tasks", - "Value #G": "tasks success", - "Value #H": "tasks failure", - "env 1": "environment", - "hostname": "worker", - "kafka_connect_cluster_id 1": "cluster", - "start_time_ms": "start time", - "version": "version" - } - } - } - ], - "transparent": false, - "type": "table" - }, - { - "cacheTimeout": null, - "color": { - "mode": "thresholds" - }, - "columns": [], - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": { - "align": "auto", - "displayMode": "auto", - "filterable": false - }, - "thresholds": { - "mode": "absolute", - "steps": [] - } - }, - "overrides": [] - }, - "fontSize": "100%", - "gridPos": { - "h": 5, - "w": 24, - "x": 0, - "y": 2 - }, - "height": null, - "hideTimeOverride": false, - "id": 9, - "interval": null, - "links": [], - "mappings": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "showHeader": true - }, - "repeat": null, - "repeatDirection": null, - "span": 6, - "targets": [ - { - "datasource": null, - "expr": "kafka_connect_connector_info{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"}", - "format": "table", - "hide": false, - "instant": true, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": null, - "expr": "sum by (connector) (kafka_connect_connect_worker_metrics_connector_total_task_count{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"})", - "format": "table", - "hide": false, - "instant": true, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": null, - "expr": "sum by (connector) (kafka_connect_connect_worker_metrics_connector_running_task_count{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"})", - "format": "table", - "hide": false, - "instant": true, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": null, - "expr": "sum by (connector) (kafka_connect_connect_worker_metrics_connector_failed_task_count{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"})", - "format": "table", - "hide": false, - "instant": true, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": null, - "expr": "sum by (connector) (kafka_connect_connect_worker_metrics_connector_paused_task_count{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"})", - "format": "table", - "hide": false, - "instant": true, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Connectors", - "transformations": [ - { - "id": "seriesToColumns", - "options": { - "byField": "connector" - } - }, - { - "id": "filterFieldsByName", - "options": { - "include": { - "names": [ - "connector", - "Value #B", - "Value #C", - "Value #D", - "Value #E" - ] - } - } - }, - { - "id": "organize", - "options": { - "excludeByName": {}, - "renameByName": { - "Value #B": "tasks", - "Value #C": "running", - "Value #D": "failed", - "Value #E": "paused" - } - } - } - ], - "transparent": false, - "type": "table" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 3 - }, - "height": null, - "hideTimeOverride": false, - "id": 10, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_connect_connector_task_metrics_running_ratio{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{connector}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Tasks Running Ratio", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 3 - }, - "height": null, - "hideTimeOverride": false, - "id": 11, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_connect_connect_worker_rebalance_metrics_rebalance_avg_time_ms{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Rebalance Latency", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "collapsed": false, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 4 - }, - "height": null, - "hideTimeOverride": false, - "id": 12, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "System", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Rate of CPU seconds used by the Java process.\n 100% usage represents one core. \n If there are multiple cores, the total capacity should be 100% * number_cores.", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 4 - }, - "height": null, - "hideTimeOverride": false, - "id": 13, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "irate(process_cpu_seconds_total{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\"}[5m])", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "CPU usage", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Sum of JVM memory used, without including areas (e.g. heap size).", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 4 - }, - "height": null, - "hideTimeOverride": false, - "id": 14, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum without(area)(jvm_memory_bytes_used{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Memory usage", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Sum of seconds used by Garbage Collection.", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 4 - }, - "height": null, - "hideTimeOverride": false, - "id": 15, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum without(gc)(irate(jvm_gc_collection_seconds_sum{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\"}[5m]))", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "GC collection", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 6 - }, - "height": null, - "hideTimeOverride": false, - "id": 16, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 6 - }, - "height": null, - "hideTimeOverride": false, - "id": 17, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_connect_connector_task_metrics_batch_size_avg{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{connector}}[{{task}}]", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Batch Size (Avg.)", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 6 - }, - "height": null, - "hideTimeOverride": false, - "id": 18, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_connect_connector_task_metrics_batch_size_max{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{connector}}[{{task}}]", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Batch Size (Max.)", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 7 - }, - "height": null, - "hideTimeOverride": false, - "id": 19, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_connect_connector_task_metrics_offset_commit_success_percentage{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{connector}}[{{task}}]", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Offset commit success %", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 7 - }, - "height": null, - "hideTimeOverride": false, - "id": 20, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_connect_connector_task_metrics_offset_commit_avg_time_ms{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{connector}}[{{task}}]", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Offset commit avg. latency", - "transformations": [], - "transparent": false, - "type": "timeseries" - } - ], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Tasks", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 8 - }, - "height": null, - "hideTimeOverride": false, - "id": 21, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 8 - }, - "height": null, - "hideTimeOverride": false, - "id": 22, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_connect_task_error_metrics_total_record_failures{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{connector}}[{{task}}]", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Total Record Failures", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 8 - }, - "height": null, - "hideTimeOverride": false, - "id": 23, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_connect_task_error_metrics_total_record_errors{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{connector}}[{{task}}]", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Total Record Error", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 8 - }, - "height": null, - "hideTimeOverride": false, - "id": 24, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_connect_task_error_metrics_total_records_skipped{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{connector}}[{{task}}]", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Total Records Skipped", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 9 - }, - "height": null, - "hideTimeOverride": false, - "id": 25, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_connect_task_error_metrics_total_errors_logged{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{connector}}[{{task}}]", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Total Errors Logged", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 9 - }, - "height": null, - "hideTimeOverride": false, - "id": 26, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_connect_task_error_metrics_total_retries{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{connector}}[{{task}}]", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Total Retries", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 9 - }, - "height": null, - "hideTimeOverride": false, - "id": 27, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_connect_task_error_metrics_deadletterqueue_produce_requests{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{connector}}[{{task}}]", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Dead Letter Topic Requests", - "transformations": [], - "transparent": false, - "type": "timeseries" - } - ], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Task Errors", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 10 - }, - "height": null, - "hideTimeOverride": false, - "id": 28, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 10 - }, - "height": null, - "hideTimeOverride": false, - "id": 29, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_connect_source_task_metrics_poll_batch_avg_time_ms{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{connector}}[{{task}}]", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Poll Batch Avg. Latency", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 10 - }, - "height": null, - "hideTimeOverride": false, - "id": 30, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_connect_source_task_metrics_poll_batch_max_time_ms{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{connector}}[{{task}}]", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Poll Batch Max. Latency", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ops" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 11 - }, - "height": null, - "hideTimeOverride": false, - "id": 31, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_connect_source_task_metrics_source_record_poll_rate{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{connector}}[{{task}}]", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Source Record Poll Rate", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ops" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 11 - }, - "height": null, - "hideTimeOverride": false, - "id": 32, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_connect_source_task_metrics_source_record_write_rate{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{connector}}[{{task}}]", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Source Record Write Rate", - "transformations": [], - "transparent": false, - "type": "timeseries" - } - ], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Source Tasks", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 12 - }, - "height": null, - "hideTimeOverride": false, - "id": 33, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 12 - }, - "height": null, - "hideTimeOverride": false, - "id": 34, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_connect_sink_task_metrics_put_batch_avg_time_ms{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{connector}}[{{task}}]", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Put Batch Avg. Latency", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 12 - }, - "height": null, - "hideTimeOverride": false, - "id": 35, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_connect_sink_task_metrics_put_batch_max_time_ms{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{connector}}[{{task}}]", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Put Batch Max. Latency", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 13 - }, - "height": null, - "hideTimeOverride": false, - "id": 36, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_connect_sink_task_metrics_partition_count{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{connector}}[{{task}}]", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Partition Count", - "transformations": [], - "transparent": false, - "type": "timeseries" - } - ], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Sink Tasks", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 5 - }, - "height": null, - "hideTimeOverride": false, - "id": 37, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "binBps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 5 - }, - "height": null, - "hideTimeOverride": false, - "id": 38, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_connect_connect_metrics_incoming_byte_rate{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Incoming Byte Rate", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "binBps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 5 - }, - "height": null, - "hideTimeOverride": false, - "id": 39, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_connect_connect_metrics_outgoing_byte_rate{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Outgoing Byte Rate", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 6 - }, - "height": null, - "hideTimeOverride": false, - "id": 40, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_connect_connect_metrics_io_ratio{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "IO Ratio", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "binBps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 6 - }, - "height": null, - "hideTimeOverride": false, - "id": 41, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_connect_connect_metrics_network_io_rate{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Network IO Rate", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 7 - }, - "height": null, - "hideTimeOverride": false, - "id": 42, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_connect_connect_metrics_connection_count{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Active Connections", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 7 - }, - "height": null, - "hideTimeOverride": false, - "id": 43, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_connect_connect_metrics_successful_authentication_rate{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{hostname}} (success)", - "metric": "", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": null, - "expr": "kafka_connect_connect_metrics_failed_authentication_total{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{hostname}} (failed)", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Authentications", - "transformations": [], - "transparent": false, - "type": "timeseries" - } - ], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Connect Workers", - "transformations": [], - "transparent": false, - "type": "row" - } - ], - "refresh": "30s", - "rows": [], - "schemaVersion": 12, - "sharedCrosshair": false, - "style": "dark", - "tags": [ - "confluent", - "kafka-connect" - ], - "templating": { - "list": [ - { - "allValue": null, - "auto": false, - "auto_count": 30, - "auto_min": "10s", - "current": { - "selected": false, - "tags": [], - "text": null, - "value": null - }, - "datasource": "Prometheus", - "hide": 0, - "includeAll": false, - "label": "Environment", - "multi": false, - "name": "env", - "options": [], - "query": "label_values(env)", - "refresh": 1, - "regex": null, - "sort": 1, - "tagValuesQuery": null, - "tagsQuery": null, - "type": "query", - "useTags": false - }, - { - "allValue": null, - "auto": false, - "auto_count": 30, - "auto_min": "10s", - "current": { - "selected": false, - "tags": [], - "text": null, - "value": null - }, - "datasource": "Prometheus", - "hide": 0, - "includeAll": false, - "label": "Connect cluster", - "multi": false, - "name": "connect_cluster", - "options": [], - "query": "label_values(kafka_connect_connect_worker_metrics_connector_total_task_count{env=\"$env\"}, kafka_connect_cluster_id)", - "refresh": 1, - "regex": null, - "sort": 1, - "tagValuesQuery": null, - "tagsQuery": null, - "type": "query", - "useTags": false - }, - { - "allValue": null, - "auto": false, - "auto_count": 30, - "auto_min": "10s", - "current": { - "selected": false, - "tags": [], - "text": null, - "value": null - }, - "datasource": "Prometheus", - "hide": 0, - "includeAll": true, - "label": "Connect worker", - "multi": true, - "name": "connect_worker", - "options": [], - "query": "label_values(kafka_connect_connect_worker_metrics_connector_total_task_count{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"}, hostname)", - "refresh": 1, - "regex": null, - "sort": 1, - "tagValuesQuery": null, - "tagsQuery": null, - "type": "query", - "useTags": false - }, - { - "allValue": null, - "auto": false, - "auto_count": 30, - "auto_min": "10s", - "current": { - "selected": false, - "tags": [], - "text": null, - "value": null - }, - "datasource": "Prometheus", - "hide": 0, - "includeAll": true, - "label": "Connector", - "multi": true, - "name": "connector", - "options": [], - "query": "label_values(kafka_connect_connect_worker_metrics_connector_total_task_count{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"}, connector)", - "refresh": 1, - "regex": null, - "sort": 1, - "tagValuesQuery": null, - "tagsQuery": null, - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "hidden": false, - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "browser", - "title": "Kafka Connect cluster - v2", - "uid": null, - "version": 0 -} diff --git a/grafana-dashboards/default/kafka-consumer.json b/grafana-dashboards/default/kafka-consumer.json deleted file mode 100644 index 3aef1c22..00000000 --- a/grafana-dashboards/default/kafka-consumer.json +++ /dev/null @@ -1,4582 +0,0 @@ -{ - "__inputs": [ - { - "description": "", - "label": "Prometheus", - "name": "DS_PROMETHEUS", - "pluginId": "prometheus", - "pluginName": "Prometheus", - "type": "datasource" - } - ], - "annotations": { - "list": [] - }, - "description": "Overview of the Kafka consumers", - "editable": true, - "gnetId": null, - "hideControls": false, - "id": null, - "links": [], - "panels": [ - { - "cacheTimeout": null, - "collapsed": false, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 1, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Overview", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 0, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 2, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10, kafka_consumer_consumer_fetch_manager_metrics_records_consumed_rate{env=\"$env\", client_type=\"producer\", client_id=~\"$client_id\", hostname=~\"$server\"} > 0)", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Record Consumed Rate", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 4, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 3, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10, kafka_consumer_consumer_fetch_manager_metrics_records_consumed_rate{env=\"$env\", client_type=\"producer\", client_id=~\"$client_id\", hostname=~\"$server\"} > 0)", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Records Lag", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - }, - { - "color": "yellow", - "index": 1, - "line": true, - "op": "gt", - "value": 1.0, - "yaxis": "left" - }, - { - "color": "red", - "index": 2, - "line": true, - "op": "gt", - "value": 10.0, - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 8, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 4, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10, kafka_consumer_consumer_coordinator_metrics_rebalance_rate_per_hour{env=\"$env\", client_type=\"producer\", client_id=~\"$client_id\", hostname=~\"$server\"} > 0)", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Rebalance Rate per hour", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - }, - { - "color": "red", - "index": 1, - "line": true, - "op": "gt", - "value": 1.0, - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 12, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 5, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10, kafka_consumer_consumer_coordinator_metrics_failed_rebalance_rate_per_hour{env=\"$env\", client_type=\"producer\", client_id=~\"$client_id\", hostname=~\"$server\"} > 0)", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Failed Rebalance Rate per hour", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 16, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 6, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "count(kafka_consumer_app_info{env=\"$env\", client_id=~\"$client_id\", version!=\"\", hostname=~\"$server\"}) by (version)", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{version}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Versions", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 1 - }, - "height": null, - "hideTimeOverride": false, - "id": 7, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "binBps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 1 - }, - "height": null, - "hideTimeOverride": false, - "id": 8, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_bytes_consumed_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Bytes Consumed Rate", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "cts" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 1 - }, - "height": null, - "hideTimeOverride": false, - "id": 9, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_records_consumed_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Records Consumed Rate", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 1 - }, - "height": null, - "hideTimeOverride": false, - "id": 10, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_records_lag_max{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Records Lag Max", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 2 - }, - "height": null, - "hideTimeOverride": false, - "id": 11, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Fetch Rate", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 2 - }, - "height": null, - "hideTimeOverride": false, - "id": 12, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_latency_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}} (avg.)", - "metric": "", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_latency_max{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}} (max.)", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Fetch Latency", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 2 - }, - "height": null, - "hideTimeOverride": false, - "id": 13, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_size_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}} (avg.)", - "metric": "", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_size_max{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}} (max.)", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Fetch Size", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 3 - }, - "height": null, - "hideTimeOverride": false, - "id": 14, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_throttle_time_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}} (avg.)", - "metric": "", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_throttle_time_max{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}} (max.)", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Fetch Throttle Time", - "transformations": [], - "transparent": false, - "type": "timeseries" - } - ], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Performance", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 4 - }, - "height": null, - "hideTimeOverride": false, - "id": 15, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 1 - }, - "height": null, - "hideTimeOverride": false, - "id": 16, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_commit_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Commit Rate", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 1 - }, - "height": null, - "hideTimeOverride": false, - "id": 17, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_join_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Join Rate", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 1 - }, - "height": null, - "hideTimeOverride": false, - "id": 18, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_sync_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Sync Rate", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 2 - }, - "height": null, - "hideTimeOverride": false, - "id": 19, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_commit_latency_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}} (avg.)", - "metric": "", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_commit_latency_max{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}} (max.)", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Commit Latency", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 2 - }, - "height": null, - "hideTimeOverride": false, - "id": 20, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_join_time_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}} (avg.)", - "metric": "", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_join_time_max{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}} (max.)", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Join Time", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 2 - }, - "height": null, - "hideTimeOverride": false, - "id": 21, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_sync_time_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}} (avg.)", - "metric": "", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_sync_time_max{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}} (max.)", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Sync Time", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 3 - }, - "height": null, - "hideTimeOverride": false, - "id": 22, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_heartbeat_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Heartbeat Rate", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 3 - }, - "height": null, - "hideTimeOverride": false, - "id": 23, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_heartbeat_response_time_max{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Heartbeat Response Time (Max.)", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "s" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 3 - }, - "height": null, - "hideTimeOverride": false, - "id": 24, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_last_heartbeat_seconds_ago{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Last Heartbeat Seconds Ago", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 4 - }, - "height": null, - "hideTimeOverride": false, - "id": 25, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_rebalance_rate_per_hour{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_failed_rebalance_rate_per_hour{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}} (failed)", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Rebalance Rate Per Hour", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 4 - }, - "height": null, - "hideTimeOverride": false, - "id": 26, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_rebalance_latency_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}} (avg.)", - "metric": "", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_rebalance_latency_max{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}} (max.)", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Rebalance Latency", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 4 - }, - "height": null, - "hideTimeOverride": false, - "id": 27, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_assigned_partitions{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Assigned Partitions", - "transformations": [], - "transparent": false, - "type": "timeseries" - } - ], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Consumer group", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 8 - }, - "height": null, - "hideTimeOverride": false, - "id": 28, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 8 - }, - "height": null, - "hideTimeOverride": false, - "id": 29, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_metrics_connection_count{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Connection Count", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "cps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 8 - }, - "height": null, - "hideTimeOverride": false, - "id": 30, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_metrics_connection_creation_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Connection Creation Rate", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "cps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 8 - }, - "height": null, - "hideTimeOverride": false, - "id": 31, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_metrics_connection_close_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Connection Close Rate", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 9 - }, - "height": null, - "hideTimeOverride": false, - "id": 32, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_metrics_io_ratio{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "IO ratio", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 9 - }, - "height": null, - "hideTimeOverride": false, - "id": 33, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_metrics_io_wait_ratio{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "IO wait ratio", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "cps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 9 - }, - "height": null, - "hideTimeOverride": false, - "id": 34, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_metrics_select_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Select Rate", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ns" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 10 - }, - "height": null, - "hideTimeOverride": false, - "id": 35, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_metrics_io_time_ns_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "IO time avg.", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ns" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 10 - }, - "height": null, - "hideTimeOverride": false, - "id": 36, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_metrics_io_wait_time_ns_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "IO wait time avg.", - "transformations": [], - "transparent": false, - "type": "timeseries" - } - ], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Connections", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 11 - }, - "height": null, - "hideTimeOverride": false, - "id": 37, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "binBps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 11 - }, - "height": null, - "hideTimeOverride": false, - "id": 38, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_node_metrics_incoming_byte_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}} <- {{node_id}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Incoming Byte Rate", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "binBps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 11 - }, - "height": null, - "hideTimeOverride": false, - "id": 39, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_node_metrics_outgoing_byte_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}} -> {{node_id}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Outgoing Byte Rate", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 11 - }, - "height": null, - "hideTimeOverride": false, - "id": 40, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_node_metrics_request_latency_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}} -> {{node_id}} (avg.)", - "metric": "", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_node_metrics_request_latency_max{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}} -> {{node_id}} (max.)", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Request Latency", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "reqps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 12 - }, - "height": null, - "hideTimeOverride": false, - "id": 41, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_node_metrics_request_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}} -> {{node_id}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Request Rate", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "reqps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 12 - }, - "height": null, - "hideTimeOverride": false, - "id": 42, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_node_metrics_response_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}} <- {{node_id}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Response Rate", - "transformations": [], - "transparent": false, - "type": "timeseries" - } - ], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Per Broker", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 13 - }, - "height": null, - "hideTimeOverride": false, - "id": 43, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "binBps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 11 - }, - "height": null, - "hideTimeOverride": false, - "id": 44, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_bytes_consumed_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}} <- {{topic}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Bytes Consumed Rate per Topic", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "cps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 11 - }, - "height": null, - "hideTimeOverride": false, - "id": 45, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_records_consumed_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}} <- {{topic}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Records Consumed Rate per Topic", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 12 - }, - "height": null, - "hideTimeOverride": false, - "id": 46, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_size_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}} <- {{topic}} (avg.)", - "metric": "", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_size_max{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}} <- {{topic}} (max.)", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Fetch Size per Topic", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 11 - }, - "height": null, - "hideTimeOverride": false, - "id": 47, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_records_per_request_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}} <- {{topic}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Records per Request Avg. per Topic", - "transformations": [], - "transparent": false, - "type": "timeseries" - } - ], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Per Topic", - "transformations": [], - "transparent": false, - "type": "row" - } - ], - "refresh": "30s", - "rows": [], - "schemaVersion": 12, - "sharedCrosshair": false, - "style": "dark", - "tags": [ - "confluent", - "kafka-client", - "kafka-consumer" - ], - "templating": { - "list": [ - { - "allValue": null, - "auto": false, - "auto_count": 30, - "auto_min": "10s", - "current": { - "selected": false, - "tags": [], - "text": null, - "value": null - }, - "datasource": "Prometheus", - "hide": 0, - "includeAll": false, - "label": "Environment", - "multi": false, - "name": "env", - "options": [], - "query": "label_values(env)", - "refresh": 1, - "regex": null, - "sort": 1, - "tagValuesQuery": null, - "tagsQuery": null, - "type": "query", - "useTags": false - }, - { - "allValue": null, - "auto": false, - "auto_count": 30, - "auto_min": "10s", - "current": { - "selected": false, - "tags": [], - "text": null, - "value": null - }, - "datasource": "Prometheus", - "hide": 0, - "includeAll": true, - "label": "Server", - "multi": true, - "name": "server", - "options": [], - "query": "label_values(kafka_consumer_consumer_metrics_incoming_byte_rate{env=\"$env\", client_type=\"consumer\"},hostname)", - "refresh": 1, - "regex": null, - "sort": 1, - "tagValuesQuery": null, - "tagsQuery": null, - "type": "query", - "useTags": false - }, - { - "allValue": null, - "auto": false, - "auto_count": 30, - "auto_min": "10s", - "current": { - "selected": false, - "tags": [], - "text": null, - "value": null - }, - "datasource": "Prometheus", - "hide": 0, - "includeAll": true, - "label": "Client ID", - "multi": true, - "name": "client_id", - "options": [], - "query": "label_values(kafka_consumer_consumer_metrics_incoming_byte_rate{env=\"$env\", client_type=\"consumer\"},client_id)", - "refresh": 1, - "regex": null, - "sort": 1, - "tagValuesQuery": null, - "tagsQuery": null, - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "hidden": false, - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "browser", - "title": "Kafka Consumer - v2", - "uid": null, - "version": 0 -} diff --git a/grafana-dashboards/default/kafka-producer.json b/grafana-dashboards/default/kafka-producer.json deleted file mode 100644 index 17359359..00000000 --- a/grafana-dashboards/default/kafka-producer.json +++ /dev/null @@ -1,4087 +0,0 @@ -{ - "__inputs": [ - { - "description": "", - "label": "Prometheus", - "name": "DS_PROMETHEUS", - "pluginId": "prometheus", - "pluginName": "Prometheus", - "type": "datasource" - } - ], - "annotations": { - "list": [] - }, - "description": "Overview of the Kafka producers", - "editable": true, - "gnetId": null, - "hideControls": false, - "id": null, - "links": [], - "panels": [ - { - "cacheTimeout": null, - "collapsed": false, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 1, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Overview", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 0, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 2, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10, kafka_producer_producer_metrics_record_send_rate{env=\"$env\", client_type=\"producer\", client_id=~\"$client_id\", hostname=~\"$server\"} > 0)", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Record Send Rate", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - }, - { - "color": "red", - "index": 1, - "line": true, - "op": "gt", - "value": 1.0, - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 4, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 3, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10, kafka_producer_producer_metrics_record_error_rate{env=\"$env\", client_type=\"producer\", client_id=~\"$client_id\", hostname=~\"$server\"} > 0)", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Error Rate", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - }, - { - "color": "yellow", - "index": 1, - "line": true, - "op": "gt", - "value": 1.0, - "yaxis": "left" - }, - { - "color": "red", - "index": 2, - "line": true, - "op": "gt", - "value": 10.0, - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 8, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 4, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10, kafka_producer_producer_metrics_record_retry_rate{env=\"$env\", client_type=\"producer\", client_id=~\"$client_id\", hostname=~\"$server\"} > 0)", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Retry Rate", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 12, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 5, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "count(kafka_producer_app_info{env=\"$env\", client_id=~\"$client_id\", version!=\"\", hostname=~\"$server\"}) by (version)", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{version}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Versions", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 1 - }, - "height": null, - "hideTimeOverride": false, - "id": 6, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "binBps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 1 - }, - "height": null, - "hideTimeOverride": false, - "id": 7, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_incoming_byte_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Incoming Byte Rate", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "binBps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 1 - }, - "height": null, - "hideTimeOverride": false, - "id": 8, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_outgoing_byte_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Outgoing Byte Rate", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "s" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 1 - }, - "height": null, - "hideTimeOverride": false, - "id": 9, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_metadata_age{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Metadata Age", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "reqps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 2 - }, - "height": null, - "hideTimeOverride": false, - "id": 10, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_request_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Request Rate", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "reqps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 2 - }, - "height": null, - "hideTimeOverride": false, - "id": 11, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_requests_in_flight{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Request in-flight", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 2 - }, - "height": null, - "hideTimeOverride": false, - "id": 12, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_records_per_request_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Records per Request (avg.)", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "cps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 3 - }, - "height": null, - "hideTimeOverride": false, - "id": 13, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_record_send_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Record Send Rate", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "cps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 3 - }, - "height": null, - "hideTimeOverride": false, - "id": 14, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_record_retry_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Record Retry Rate", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "cps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 3 - }, - "height": null, - "hideTimeOverride": false, - "id": 15, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_record_error_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Record Error Rate", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 4 - }, - "height": null, - "hideTimeOverride": false, - "id": 16, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_record_size_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}} (avg.)", - "metric": "", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_record_size_max{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}} (max.)", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Record Size", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 4 - }, - "height": null, - "hideTimeOverride": false, - "id": 17, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_record_queue_time_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}} (avg.)", - "metric": "", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_record_queue_time_max{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}} (max.)", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Record Queue Time", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 4 - }, - "height": null, - "hideTimeOverride": false, - "id": 18, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_produce_throttle_time_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}} (avg.)", - "metric": "", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_produce_throttle_time_max{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}} (max.)", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Produce Throttle Time", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 5 - }, - "height": null, - "hideTimeOverride": false, - "id": 19, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_batch_size_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}} (avg.)", - "metric": "", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_batch_size_max{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}} (max.)", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Batch Size", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "cps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 5 - }, - "height": null, - "hideTimeOverride": false, - "id": 20, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_batch_split_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Batch Split Rate", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 5 - }, - "height": null, - "hideTimeOverride": false, - "id": 21, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_compression_rate_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Compression Rate (avg.)", - "transformations": [], - "transparent": false, - "type": "timeseries" - } - ], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Performance", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 6 - }, - "height": null, - "hideTimeOverride": false, - "id": 22, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 6 - }, - "height": null, - "hideTimeOverride": false, - "id": 23, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_connection_count{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Connection Count", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "cps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 6 - }, - "height": null, - "hideTimeOverride": false, - "id": 24, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_connection_creation_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Connection Creation Rate", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "cps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 6 - }, - "height": null, - "hideTimeOverride": false, - "id": 25, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_connection_close_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Connection Close Rate", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 7 - }, - "height": null, - "hideTimeOverride": false, - "id": 26, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_io_ratio{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "IO ratio", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 7 - }, - "height": null, - "hideTimeOverride": false, - "id": 27, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_io_wait_ratio{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "IO wait ratio", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "cps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 7 - }, - "height": null, - "hideTimeOverride": false, - "id": 28, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_select_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Select Rate", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ns" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 8 - }, - "height": null, - "hideTimeOverride": false, - "id": 29, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_io_time_ns_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "IO time avg.", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ns" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 8 - }, - "height": null, - "hideTimeOverride": false, - "id": 30, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_io_wait_time_ns_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "IO wait time avg.", - "transformations": [], - "transparent": false, - "type": "timeseries" - } - ], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Connections", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 8 - }, - "height": null, - "hideTimeOverride": false, - "id": 31, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "binBps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 8 - }, - "height": null, - "hideTimeOverride": false, - "id": 32, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_producer_producer_node_metrics_incoming_byte_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}} <- {{node_id}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Incoming Byte Rate", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "binBps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 8 - }, - "height": null, - "hideTimeOverride": false, - "id": 33, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_producer_producer_node_metrics_outgoing_byte_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}} -> {{node_id}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Outgoing Byte Rate", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 8 - }, - "height": null, - "hideTimeOverride": false, - "id": 34, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_producer_producer_node_metrics_request_latency_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}} -> {{node_id}} (avg.)", - "metric": "", - "refId": "", - "step": 10, - "target": "" - }, - { - "datasource": null, - "expr": "topk(10,kafka_producer_producer_node_metrics_request_latency_max{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}} -> {{node_id}} (max.)", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Request Latency", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "reqps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 9 - }, - "height": null, - "hideTimeOverride": false, - "id": 35, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_producer_producer_node_metrics_request_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}} -> {{node_id}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Request Rate", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "reqps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 9 - }, - "height": null, - "hideTimeOverride": false, - "id": 36, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_producer_producer_node_metrics_response_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}} <- {{node_id}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Response Rate", - "transformations": [], - "transparent": false, - "type": "timeseries" - } - ], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Per Broker", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 10 - }, - "height": null, - "hideTimeOverride": false, - "id": 37, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "binBps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 10 - }, - "height": null, - "hideTimeOverride": false, - "id": 38, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_producer_producer_topic_metrics_byte_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}} -> {{topic}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Byte Rate", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 10 - }, - "height": null, - "hideTimeOverride": false, - "id": 39, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_producer_producer_topic_metrics_compression_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}} -> {{topic}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Compression Rate", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "cps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 10 - }, - "height": null, - "hideTimeOverride": false, - "id": 40, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_producer_producer_topic_metrics_record_send_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}} -> {{topic}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Record Send Rate", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "cps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 11 - }, - "height": null, - "hideTimeOverride": false, - "id": 41, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_producer_producer_topic_metrics_record_retry_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}} -> {{topic}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Record Retry Rate", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "cps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 11 - }, - "height": null, - "hideTimeOverride": false, - "id": 42, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_producer_producer_topic_metrics_record_error_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{client_id}}@{{hostname}} -> {{topic}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Record Error Rate", - "transformations": [], - "transparent": false, - "type": "timeseries" - } - ], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Per Topic", - "transformations": [], - "transparent": false, - "type": "row" - } - ], - "refresh": "30s", - "rows": [], - "schemaVersion": 12, - "sharedCrosshair": false, - "style": "dark", - "tags": [ - "confluent", - "kafka-client", - "kafka-producer" - ], - "templating": { - "list": [ - { - "allValue": null, - "auto": false, - "auto_count": 30, - "auto_min": "10s", - "current": { - "selected": false, - "tags": [], - "text": null, - "value": null - }, - "datasource": "Prometheus", - "hide": 0, - "includeAll": false, - "label": "Environment", - "multi": false, - "name": "env", - "options": [], - "query": "label_values(env)", - "refresh": 1, - "regex": null, - "sort": 1, - "tagValuesQuery": null, - "tagsQuery": null, - "type": "query", - "useTags": false - }, - { - "allValue": null, - "auto": false, - "auto_count": 30, - "auto_min": "10s", - "current": { - "selected": false, - "tags": [], - "text": null, - "value": null - }, - "datasource": "Prometheus", - "hide": 0, - "includeAll": true, - "label": "Server", - "multi": true, - "name": "server", - "options": [], - "query": "label_values(kafka_producer_producer_metrics_record_retry_rate{env=\"$env\", client_type=\"producer\"},hostname)", - "refresh": 1, - "regex": null, - "sort": 1, - "tagValuesQuery": null, - "tagsQuery": null, - "type": "query", - "useTags": false - }, - { - "allValue": null, - "auto": false, - "auto_count": 30, - "auto_min": "10s", - "current": { - "selected": false, - "tags": [], - "text": null, - "value": null - }, - "datasource": "Prometheus", - "hide": 0, - "includeAll": true, - "label": "Client ID", - "multi": true, - "name": "client_id", - "options": [], - "query": "label_values(kafka_producer_producer_metrics_record_retry_rate{env=\"$env\", client_type=\"producer\"},client_id)", - "refresh": 1, - "regex": null, - "sort": 1, - "tagValuesQuery": null, - "tagsQuery": null, - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "hidden": false, - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "browser", - "title": "Kafka Producer - v2", - "uid": null, - "version": 0 -} diff --git a/grafana-dashboards/default/kafka-quotas.json b/grafana-dashboards/default/kafka-quotas.json deleted file mode 100644 index 189bcad9..00000000 --- a/grafana-dashboards/default/kafka-quotas.json +++ /dev/null @@ -1,779 +0,0 @@ -{ - "__inputs": [ - { - "description": "", - "label": "Prometheus", - "name": "DS_PROMETHEUS", - "pluginId": "prometheus", - "pluginName": "Prometheus", - "type": "datasource" - } - ], - "annotations": { - "list": [] - }, - "description": "Overview of the Kafka quotass", - "editable": true, - "gnetId": null, - "hideControls": false, - "id": null, - "links": [], - "panels": [ - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "binBps" - }, - "overrides": [] - }, - "gridPos": { - "h": 12, - "w": 8, - "x": 0, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 1, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_server_produce_byte_rate{env=\"$env\",user=~\"$user\",client_id=~\"$client_id\", hostname=~\"$broker\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "User:{{user}} | Client ID:{{client_id}} @ Broker:{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Produce Byte Rate", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "binBps" - }, - "overrides": [] - }, - "gridPos": { - "h": 12, - "w": 8, - "x": 8, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 2, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_server_fetch_byte_rate{env=\"$env\",user=~\"$user\",client_id=~\"$client_id\", hostname=~\"$broker\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "User:{{user}} | Client ID:{{client_id}} @ Broker:{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Fetch Byte Rate", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "percent" - }, - "overrides": [] - }, - "gridPos": { - "h": 12, - "w": 8, - "x": 16, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 3, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_server_request_request_time{env=\"$env\",user=~\"$user\",client_id=~\"$client_id\", hostname=~\"$broker\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "User:{{user}} | Client ID:{{client_id}} @ Broker:{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Request Time", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 12, - "w": 8, - "x": 0, - "y": 1 - }, - "height": null, - "hideTimeOverride": false, - "id": 4, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_server_produce_throttle_time{env=\"$env\",user=~\"$user\",client_id=~\"$client_id\", hostname=~\"$broker\"} > 0)", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "User:{{user}} | Client ID:{{client_id}} @ Broker:{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Produce Throttle Time", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 12, - "w": 8, - "x": 8, - "y": 1 - }, - "height": null, - "hideTimeOverride": false, - "id": 5, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_server_fetch_throttle_time{env=\"$env\",user=~\"$user\",client_id=~\"$client_id\", hostname=~\"$broker\"} > 0)", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "User:{{user}} | Client ID:{{client_id}} @ Broker:{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Fetch Throttle Time", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 12, - "w": 8, - "x": 16, - "y": 1 - }, - "height": null, - "hideTimeOverride": false, - "id": 6, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10,kafka_server_request_throttle_time{env=\"$env\",user=~\"$user\",client_id=~\"$client_id\", hostname=~\"$broker\"} > 0)", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "User:{{user}} | Client ID:{{client_id}} @ Broker:{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Request Throttle Time", - "transformations": [], - "transparent": false, - "type": "timeseries" - } - ], - "refresh": "30s", - "rows": [], - "schemaVersion": 12, - "sharedCrosshair": false, - "style": "dark", - "tags": [ - "confluent", - "kafka-client", - "kafka-quota" - ], - "templating": { - "list": [ - { - "allValue": null, - "auto": false, - "auto_count": 30, - "auto_min": "10s", - "current": { - "selected": false, - "tags": [], - "text": null, - "value": null - }, - "datasource": "Prometheus", - "hide": 0, - "includeAll": false, - "label": "Environment", - "multi": false, - "name": "env", - "options": [], - "query": "label_values(env)", - "refresh": 1, - "regex": null, - "sort": 1, - "tagValuesQuery": null, - "tagsQuery": null, - "type": "query", - "useTags": false - }, - { - "allValue": null, - "auto": false, - "auto_count": 30, - "auto_min": "10s", - "current": { - "selected": false, - "tags": [], - "text": null, - "value": null - }, - "datasource": "Prometheus", - "hide": 0, - "includeAll": true, - "label": "Broker", - "multi": true, - "name": "broker", - "options": [], - "query": "label_values(kafka_server_produce_byte_rate{env=\"$env\"},hostname)", - "refresh": 1, - "regex": null, - "sort": 1, - "tagValuesQuery": null, - "tagsQuery": null, - "type": "query", - "useTags": false - }, - { - "allValue": null, - "auto": false, - "auto_count": 30, - "auto_min": "10s", - "current": { - "selected": false, - "tags": [], - "text": null, - "value": null - }, - "datasource": "Prometheus", - "hide": 0, - "includeAll": true, - "label": "User", - "multi": true, - "name": "user", - "options": [], - "query": "label_values(user)", - "refresh": 1, - "regex": null, - "sort": 1, - "tagValuesQuery": null, - "tagsQuery": null, - "type": "query", - "useTags": false - }, - { - "allValue": null, - "auto": false, - "auto_count": 30, - "auto_min": "10s", - "current": { - "selected": false, - "tags": [], - "text": null, - "value": null - }, - "datasource": "Prometheus", - "hide": 0, - "includeAll": true, - "label": "Client ID", - "multi": true, - "name": "client_id", - "options": [], - "query": "label_values(client_id)", - "refresh": 1, - "regex": null, - "sort": 1, - "tagValuesQuery": null, - "tagsQuery": null, - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "hidden": false, - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "browser", - "title": "Kafka Quotas - v2", - "uid": null, - "version": 0 -} diff --git a/grafana-dashboards/default/kafka-topics.json b/grafana-dashboards/default/kafka-topics.json deleted file mode 100644 index 7354ae5c..00000000 --- a/grafana-dashboards/default/kafka-topics.json +++ /dev/null @@ -1,1085 +0,0 @@ -{ - "__inputs": [ - { - "description": "", - "label": "Prometheus", - "name": "DS_PROMETHEUS", - "pluginId": "prometheus", - "pluginName": "Prometheus", - "type": "datasource" - } - ], - "annotations": { - "list": [] - }, - "description": "Overview of the Kafka topics", - "editable": true, - "gnetId": null, - "hideControls": false, - "id": null, - "links": [], - "panels": [ - { - "cacheTimeout": null, - "collapsed": false, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 1, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Throughput", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "cps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 2, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10, sum without(instance,pod,statefulset_kubernetes_io_pod_name) (rate(kafka_server_brokertopicmetrics_messagesinpersec{topic=~\"$topic\",env=\"$env\"}[5m])))", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{topic}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Messages In/Sec", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 3, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10, sum(kafka_log_log_size{env=\"$env\",topic=~\"$topic\"}) by (topic))", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{topic}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Log size", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "binBps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 1 - }, - "height": null, - "hideTimeOverride": false, - "id": 4, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10, sum without(instance,pod,statefulset_kubernetes_io_pod_name) (rate(kafka_server_brokertopicmetrics_bytesinpersec{topic=~\"$topic\",env=\"$env\"}[5m])))", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{topic}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Bytes In/Sec", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "binBps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 1 - }, - "height": null, - "hideTimeOverride": false, - "id": 5, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10, sum without(instance,pod,statefulset_kubernetes_io_pod_name) (rate(kafka_server_brokertopicmetrics_bytesoutpersec{topic=~\"$topic\",env=\"$env\"}[5m])))", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{topic}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Bytes Out/Sec", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "reqps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 2 - }, - "height": null, - "hideTimeOverride": false, - "id": 6, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10, sum(rate(kafka_server_brokertopicmetrics_totalproducerequestspersec{ env=\"$env\", topic=~\"$topic\"}[5m])) by (topic))", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{topic}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Produce Requests/Sec", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "reqps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 2 - }, - "height": null, - "hideTimeOverride": false, - "id": 7, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "topk(10, sum(rate(kafka_server_brokertopicmetrics_totalfetchrequestspersec{ env=\"$env\", topic=~\"$topic\"}[5m])) by (topic))", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{topic}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Consumer Fetch Requests/Sec", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 3 - }, - "height": null, - "hideTimeOverride": false, - "id": 8, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ - { - "cacheTimeout": null, - "color": { - "mode": "thresholds" - }, - "columns": [], - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": { - "align": "auto", - "displayMode": "auto", - "filterable": true - }, - "thresholds": { - "mode": "absolute", - "steps": [] - } - }, - "overrides": [] - }, - "fontSize": "100%", - "gridPos": { - "h": 10, - "w": 12, - "x": 0, - "y": 3 - }, - "height": null, - "hideTimeOverride": false, - "id": 9, - "interval": null, - "links": [], - "mappings": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "showHeader": true - }, - "repeat": null, - "repeatDirection": null, - "span": 6, - "targets": [ - { - "datasource": null, - "expr": "kafka_log_log_logstartoffset{env=\"$env\",topic=~\"$topic\"}", - "format": "table", - "hide": false, - "instant": true, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{topic}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Start Offsets", - "transformations": [ - { - "id": "organize", - "options": { - "excludeByName": { - "Time": true, - "__name__": true, - "app": true, - "clusterId": true, - "confluentPlatform": true, - "confluent_platform": true, - "controller_revision_hash": true, - "instance": true, - "job": true, - "namespace": true, - "platform_confluent_io_type": true, - "statefulset_kubernetes_io_pod_name": true, - "type": true - }, - "indexByName": { - "Value": 4, - "partition": 3, - "pod": 1, - "topic": 2 - }, - "renameByName": { - "Value": "offset" - } - } - }, - { - "id": "convertFieldType", - "options": { - "conversions": [ - { - "destinationType": "number", - "targetField": "partition" - } - ], - "fields": {} - } - }, - { - "id": "sortBy", - "options": { - "fields": {}, - "sort": [ - { - "field": "topic" - } - ] - } - }, - { - "id": "sortBy", - "options": { - "fields": {}, - "sort": [ - { - "field": "partition" - } - ] - } - } - ], - "transparent": false, - "type": "table" - }, - { - "cacheTimeout": null, - "color": { - "mode": "thresholds" - }, - "columns": [], - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": { - "align": "auto", - "displayMode": "auto", - "filterable": true - }, - "thresholds": { - "mode": "absolute", - "steps": [] - } - }, - "overrides": [] - }, - "fontSize": "100%", - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 3 - }, - "height": null, - "hideTimeOverride": false, - "id": 10, - "interval": null, - "links": [], - "mappings": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "showHeader": true - }, - "repeat": null, - "repeatDirection": null, - "span": 6, - "targets": [ - { - "datasource": null, - "expr": "kafka_log_log_logendoffset{env=\"$env\",topic=~\"$topic\"}", - "format": "table", - "hide": false, - "instant": true, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{topic}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "End Offsets", - "transformations": [ - { - "id": "organize", - "options": { - "excludeByName": { - "Time": true, - "__name__": true, - "app": true, - "clusterId": true, - "confluentPlatform": true, - "confluent_platform": true, - "controller_revision_hash": true, - "instance": true, - "job": true, - "namespace": true, - "platform_confluent_io_type": true, - "statefulset_kubernetes_io_pod_name": true, - "type": true - }, - "indexByName": { - "Value": 4, - "partition": 3, - "pod": 1, - "topic": 2 - }, - "renameByName": { - "Value": "offset" - } - } - }, - { - "id": "convertFieldType", - "options": { - "conversions": [ - { - "destinationType": "number", - "targetField": "partition" - } - ], - "fields": {} - } - }, - { - "id": "sortBy", - "options": { - "fields": {}, - "sort": [ - { - "field": "topic" - } - ] - } - }, - { - "id": "sortBy", - "options": { - "fields": {}, - "sort": [ - { - "field": "partition" - } - ] - } - } - ], - "transparent": false, - "type": "table" - } - ], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Offsets", - "transformations": [], - "transparent": false, - "type": "row" - } - ], - "refresh": "30s", - "rows": [], - "schemaVersion": 12, - "sharedCrosshair": false, - "style": "dark", - "tags": [ - "confluent", - "kafka" - ], - "templating": { - "list": [ - { - "allValue": null, - "auto": false, - "auto_count": 30, - "auto_min": "10s", - "current": { - "selected": false, - "tags": [], - "text": null, - "value": null - }, - "datasource": "Prometheus", - "hide": 0, - "includeAll": false, - "label": "Environment", - "multi": false, - "name": "env", - "options": [], - "query": "label_values(env)", - "refresh": 1, - "regex": null, - "sort": 1, - "tagValuesQuery": null, - "tagsQuery": null, - "type": "query", - "useTags": false - }, - { - "allValue": null, - "auto": false, - "auto_count": 30, - "auto_min": "10s", - "current": { - "selected": false, - "tags": [], - "text": null, - "value": null - }, - "datasource": "Prometheus", - "hide": 0, - "includeAll": true, - "label": "Topic", - "multi": true, - "name": "topic", - "options": [], - "query": "label_values(kafka_log_log_size{env=\"$env\"}, topic)", - "refresh": 1, - "regex": null, - "sort": 1, - "tagValuesQuery": null, - "tagsQuery": null, - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "hidden": false, - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "browser", - "title": "Kafka topics - v2", - "uid": null, - "version": 0 -} diff --git a/grafana-dashboards/default/ksqldb-cluster.json b/grafana-dashboards/default/ksqldb-cluster.json deleted file mode 100644 index 0452da9c..00000000 --- a/grafana-dashboards/default/ksqldb-cluster.json +++ /dev/null @@ -1,3683 +0,0 @@ -{ - "__inputs": [ - { - "description": "", - "label": "Prometheus", - "name": "DS_PROMETHEUS", - "pluginId": "prometheus", - "pluginName": "Prometheus", - "type": "datasource" - } - ], - "annotations": { - "list": [] - }, - "description": "Overview of ksqlDB clusters.", - "editable": true, - "gnetId": null, - "hideControls": false, - "id": null, - "links": [], - "panels": [ - { - "cacheTimeout": null, - "collapsed": false, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 1, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Overview", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "ksqlDB online instances returning metrics.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 0, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 2, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "count(ksql_ksql_engine_query_stats_num_active_queries{env=\"$env\", ksqldb_cluster_id=\"$ksqldb_cluster\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "ksqlDB: Online Servers", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Number of active queries deployed in the cluster.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 4, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 3, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(ksql_ksql_engine_query_stats_num_active_queries{env=\"$env\", ksqldb_cluster_id=\"$ksqldb_cluster\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "ksqlDB: Sum of Active Queries", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Number of running queries deployed in the cluster.\n Ideally, this number should be equal to the number of active queries as queries should be running.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - }, - { - "color": "green", - "index": 1, - "line": true, - "op": "gt", - "value": 1.0, - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 8, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 4, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(ksql_ksql_engine_query_stats_running_queries{env=\"$env\", ksqldb_cluster_id=\"$ksqldb_cluster\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "ksqlDB: Sum of Running Queries", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Number of queries rebalancing in the cluster.\n Ideally, this number should be equal zero, or return to zero in a short period (e.g. 1 minute).\n It's recommended to alert if the number of rebalancing queries stay higher than 0 for a longer period of time.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - }, - { - "color": "yellow", - "index": 1, - "line": true, - "op": "gt", - "value": 1.0, - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 12, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 5, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(ksql_ksql_engine_query_stats_rebalancing_queries{env=\"$env\", ksqldb_cluster_id=\"$ksqldb_cluster\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "ksqlDB: Sum of Rebalancing Queries", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Number of queries failed in the cluster.\n Ideally, this number should be equal zero.\n It's recommended to alert if the number of queries failed is higher than 0.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - }, - { - "color": "red", - "index": 1, - "line": true, - "op": "gt", - "value": 1.0, - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 16, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 6, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "avg(ksql_ksql_engine_query_stats_error_queries{env=\"$env\", ksqldb_cluster_id=\"$ksqldb_cluster\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Connect: Sum of Queries Failed", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 1 - }, - "height": null, - "hideTimeOverride": false, - "id": 7, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "ksql_ksql_engine_query_stats_liveness_indicator{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Cluster Liveness", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "cps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 1 - }, - "height": null, - "hideTimeOverride": false, - "id": 8, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "ksql_ksql_engine_query_stats_messages_consumed_per_sec{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Messages consumed/sec", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "cps" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 1 - }, - "height": null, - "hideTimeOverride": false, - "id": 9, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "ksql_ksql_engine_query_stats_messages_produced_per_sec{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Messages produced/sec", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "collapsed": false, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 2 - }, - "height": null, - "hideTimeOverride": false, - "id": 10, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "System", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Rate of CPU seconds used by the Java process.\n 100% usage represents one core. \n If there are multiple cores, the total capacity should be 100% * number_cores.", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 2 - }, - "height": null, - "hideTimeOverride": false, - "id": 11, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "irate(process_cpu_seconds_total{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\"}[5m])", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "CPU usage", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Sum of JVM memory used, without including areas (e.g. heap size).", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 2 - }, - "height": null, - "hideTimeOverride": false, - "id": 12, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum without(area)(jvm_memory_bytes_used{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Memory usage", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Sum of seconds used by Garbage Collection.", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 2 - }, - "height": null, - "hideTimeOverride": false, - "id": 13, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum without(gc)(irate(jvm_gc_collection_seconds_sum{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\"}[5m]))", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "GC collection", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 3 - }, - "height": null, - "hideTimeOverride": false, - "id": 14, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 3 - }, - "height": null, - "hideTimeOverride": false, - "id": 15, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_streams_stream_thread_metrics_poll_latency_avg{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{thread_id}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Poll Latency (Avg.)", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 3 - }, - "height": null, - "hideTimeOverride": false, - "id": 16, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_streams_stream_thread_metrics_poll_latency_max{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{thread_id}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Poll Latency (Max.)", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 4 - }, - "height": null, - "hideTimeOverride": false, - "id": 17, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_streams_stream_thread_metrics_process_latency_avg{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{thread_id}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Process Latency (Avg.)", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 4 - }, - "height": null, - "hideTimeOverride": false, - "id": 18, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_streams_stream_thread_metrics_process_latency_max{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{thread_id}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Process Latency (Max.)", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 5 - }, - "height": null, - "hideTimeOverride": false, - "id": 19, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_streams_stream_thread_metrics_commit_latency_avg{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{thread_id}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Commit Latency (Avg.)", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 5 - }, - "height": null, - "hideTimeOverride": false, - "id": 20, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_streams_stream_thread_metrics_commit_latency_max{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{thread_id}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Commit Latency (Max.)", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 6 - }, - "height": null, - "hideTimeOverride": false, - "id": 21, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_streams_stream_thread_metrics_punctuate_latency_avg{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{thread_id}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Punctuate Latency (Avg.)", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 6 - }, - "height": null, - "hideTimeOverride": false, - "id": 22, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_streams_stream_thread_metrics_punctuate_latency_max{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{thread_id}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Punctuate Latency (Max.)", - "transformations": [], - "transparent": false, - "type": "timeseries" - } - ], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Queries Performance", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 7 - }, - "height": null, - "hideTimeOverride": false, - "id": 23, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ops" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 7 - }, - "height": null, - "hideTimeOverride": false, - "id": 24, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_streams_stream_state_metrics_put_rate{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{thread_id}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Put Rate", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 7 - }, - "height": null, - "hideTimeOverride": false, - "id": 25, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_streams_stream_state_metrics_put_latency_avg{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{thread_id}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Put Latency (Avg.)", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 7 - }, - "height": null, - "hideTimeOverride": false, - "id": 26, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_streams_stream_state_metrics_put_latency_max{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{thread_id}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Put Latency (Max.)", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ops" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 8 - }, - "height": null, - "hideTimeOverride": false, - "id": 27, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_streams_stream_state_metrics_put_if_absent_rate{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{thread_id}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Put if absent Rate", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 8 - }, - "height": null, - "hideTimeOverride": false, - "id": 28, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_streams_stream_state_metrics_put_if_absent_latency_avg{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{thread_id}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Put if absent Latency (Avg.)", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 8 - }, - "height": null, - "hideTimeOverride": false, - "id": 29, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_streams_stream_state_metrics_put_if_absent_latency_max{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{thread_id}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Put if absent Latency (Max.)", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ops" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 9 - }, - "height": null, - "hideTimeOverride": false, - "id": 30, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_streams_stream_state_metrics_fetch_rate{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{thread_id}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Fetch Rate", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 9 - }, - "height": null, - "hideTimeOverride": false, - "id": 31, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_streams_stream_state_metrics_fetch_latency_avg{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{thread_id}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Fetch Latency (Avg.)", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 9 - }, - "height": null, - "hideTimeOverride": false, - "id": 32, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_streams_stream_state_metrics_fetch_latency_max{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{thread_id}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Fetch Latency (Max.)", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ops" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 10 - }, - "height": null, - "hideTimeOverride": false, - "id": 33, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_streams_stream_state_metrics_delete_rate{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{thread_id}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Delete Rate", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 10 - }, - "height": null, - "hideTimeOverride": false, - "id": 34, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_streams_stream_state_metrics_delete_latency_avg{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{thread_id}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Delete Latency (Avg.)", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 10 - }, - "height": null, - "hideTimeOverride": false, - "id": 35, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_streams_stream_state_metrics_delete_latency_max{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{thread_id}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Delete Latency (Max.)", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ops" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 11 - }, - "height": null, - "hideTimeOverride": false, - "id": 36, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_streams_stream_state_metrics_restore_rate{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{thread_id}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Restore Rate", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 11 - }, - "height": null, - "hideTimeOverride": false, - "id": 37, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_streams_stream_state_metrics_restore_latency_avg{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{thread_id}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Restore Latency (Avg.)", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 11 - }, - "height": null, - "hideTimeOverride": false, - "id": 38, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_streams_stream_state_metrics_restore_latency_max{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{thread_id}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Restore Latency (Max.)", - "transformations": [], - "transparent": false, - "type": "timeseries" - } - ], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "State Stores", - "transformations": [], - "transparent": false, - "type": "row" - } - ], - "refresh": "30s", - "rows": [], - "schemaVersion": 12, - "sharedCrosshair": false, - "style": "dark", - "tags": [ - "confluent", - "ksqldb" - ], - "templating": { - "list": [ - { - "allValue": null, - "auto": false, - "auto_count": 30, - "auto_min": "10s", - "current": { - "selected": false, - "tags": [], - "text": null, - "value": null - }, - "datasource": "Prometheus", - "hide": 0, - "includeAll": false, - "label": "Environment", - "multi": false, - "name": "env", - "options": [], - "query": "label_values(env)", - "refresh": 1, - "regex": null, - "sort": 1, - "tagValuesQuery": null, - "tagsQuery": null, - "type": "query", - "useTags": false - }, - { - "allValue": null, - "auto": false, - "auto_count": 30, - "auto_min": "10s", - "current": { - "selected": false, - "tags": [], - "text": null, - "value": null - }, - "datasource": "Prometheus", - "hide": 0, - "includeAll": false, - "label": "ksqlDB cluster", - "multi": false, - "name": "ksqldb_cluster", - "options": [], - "query": "label_values(ksql_ksql_engine_query_stats_num_active_queries{env=\"$env\"},ksqldb_cluster_id)", - "refresh": 1, - "regex": null, - "sort": 1, - "tagValuesQuery": null, - "tagsQuery": null, - "type": "query", - "useTags": false - }, - { - "allValue": null, - "auto": false, - "auto_count": 30, - "auto_min": "10s", - "current": { - "selected": false, - "tags": [], - "text": null, - "value": null - }, - "datasource": "Prometheus", - "hide": 2, - "includeAll": false, - "label": "ksqlDB cluster ID", - "multi": false, - "name": "ksqldb_cluster_id", - "options": [], - "query": "label_values(ksql_ksql_engine_query_stats_num_active_queries{env=\"$env\"},ksql_cluster)", - "refresh": 1, - "regex": null, - "sort": 1, - "tagValuesQuery": null, - "tagsQuery": null, - "type": "query", - "useTags": false - }, - { - "allValue": null, - "auto": false, - "auto_count": 30, - "auto_min": "10s", - "current": { - "selected": false, - "tags": [], - "text": null, - "value": null - }, - "datasource": "Prometheus", - "hide": 0, - "includeAll": true, - "label": "ksqlDB server", - "multi": true, - "name": "ksqldb_server", - "options": [], - "query": "label_values(ksql_ksql_engine_query_stats_num_active_queries{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\"}, hostname)", - "refresh": 1, - "regex": null, - "sort": 1, - "tagValuesQuery": null, - "tagsQuery": null, - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "hidden": false, - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "browser", - "title": "ksqlDB cluster - v2", - "uid": null, - "version": 0 -} diff --git a/grafana-dashboards/default/schema-registry-cluster.json b/grafana-dashboards/default/schema-registry-cluster.json deleted file mode 100644 index caf43878..00000000 --- a/grafana-dashboards/default/schema-registry-cluster.json +++ /dev/null @@ -1,937 +0,0 @@ -{ - "__inputs": [ - { - "description": "", - "label": "Prometheus", - "name": "DS_PROMETHEUS", - "pluginId": "prometheus", - "pluginName": "Prometheus", - "type": "datasource" - } - ], - "annotations": { - "list": [] - }, - "description": "Overview of the Schema Registry cluster", - "editable": true, - "gnetId": null, - "hideControls": false, - "id": null, - "links": [], - "panels": [ - { - "cacheTimeout": null, - "collapsed": false, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 1, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Overview", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Schema Registry online instances returning metrics.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "red", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - }, - { - "color": "yellow", - "index": 1, - "line": true, - "op": "gt", - "value": 1.0, - "yaxis": "left" - }, - { - "color": "green", - "index": 2, - "line": true, - "op": "gt", - "value": 2.0, - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 0, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 2, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "count(kafka_schema_registry_registered_count{env=\"$env\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "SR: Online instances", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Average number of registered schemas across the cluster.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 4, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 3, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "avg(kafka_schema_registry_registered_count{env=\"$env\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "SR: Registered Schemas (avg.)", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Average number of schemas created, by type.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 8, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 4, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "avg(kafka_schema_registry_schemas_created{env=\"$env\"}) by (schema_type)", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{schema_type}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "SR: Created Schemas by Type (avg.)", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 12, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 5, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(kafka_schema_registry_schemas_deleted{env=\"$env\"}) by (schema_type)", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{schema_type}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "SR: Sum of Deleted Schemas by Type", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 16, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 6, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(kafka_schema_registry_kafka_schema_registry_metrics_connection_count)", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "SR: Sum of Active Connections", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "collapsed": false, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 1 - }, - "height": null, - "hideTimeOverride": false, - "id": 7, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "System", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 1 - }, - "height": null, - "hideTimeOverride": false, - "id": 8, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "irate(process_cpu_seconds_total{env=\"$env\",hostname=~\"$sr_server\"}[5m])", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "CPU usage", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 1 - }, - "height": null, - "hideTimeOverride": false, - "id": 9, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum without(area)(jvm_memory_bytes_used{env=\"$env\",hostname=~\"$sr_server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Memory usage", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 1 - }, - "height": null, - "hideTimeOverride": false, - "id": 10, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum without(gc)(irate(jvm_gc_collection_seconds_sum{env=\"$env\",hostname=~\"$sr_server\"}[5m]))", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "GC collection", - "transformations": [], - "transparent": false, - "type": "timeseries" - } - ], - "refresh": "30s", - "rows": [], - "schemaVersion": 12, - "sharedCrosshair": false, - "style": "dark", - "tags": [ - "confluent", - "schema-registry" - ], - "templating": { - "list": [ - { - "allValue": null, - "auto": false, - "auto_count": 30, - "auto_min": "10s", - "current": { - "selected": false, - "tags": [], - "text": null, - "value": null - }, - "datasource": "Prometheus", - "hide": 0, - "includeAll": false, - "label": "Environment", - "multi": false, - "name": "env", - "options": [], - "query": "label_values(env)", - "refresh": 1, - "regex": null, - "sort": 1, - "tagValuesQuery": null, - "tagsQuery": null, - "type": "query", - "useTags": false - }, - { - "allValue": null, - "auto": false, - "auto_count": 30, - "auto_min": "10s", - "current": { - "selected": false, - "tags": [], - "text": null, - "value": null - }, - "datasource": "Prometheus", - "hide": 0, - "includeAll": true, - "label": "Server", - "multi": true, - "name": "sr_server", - "options": [], - "query": "label_values(kafka_schema_registry_registered_count{env=\"$env\"}, hostname)", - "refresh": 1, - "regex": null, - "sort": 1, - "tagValuesQuery": null, - "tagsQuery": null, - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "hidden": false, - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "browser", - "title": "Schema Registry cluster - v2", - "uid": null, - "version": 0 -} diff --git a/grafana-dashboards/default/zookeeper-cluster.json b/grafana-dashboards/default/zookeeper-cluster.json deleted file mode 100644 index 465129dc..00000000 --- a/grafana-dashboards/default/zookeeper-cluster.json +++ /dev/null @@ -1,1914 +0,0 @@ -{ - "__inputs": [ - { - "description": "", - "label": "Prometheus", - "name": "DS_PROMETHEUS", - "pluginId": "prometheus", - "pluginName": "Prometheus", - "type": "datasource" - } - ], - "annotations": { - "list": [] - }, - "description": "Overview of the Zookeeper cluster", - "editable": true, - "gnetId": null, - "hideControls": false, - "id": null, - "links": [], - "panels": [ - { - "cacheTimeout": null, - "collapsed": false, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 1, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Overview", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Quorum Size of Zookeeper ensemble.\n Count Zookeeper servers with quorum size metric.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "red", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - }, - { - "color": "yellow", - "index": 1, - "line": true, - "op": "gt", - "value": 2.0, - "yaxis": "left" - }, - { - "color": "green", - "index": 2, - "line": true, - "op": "gt", - "value": 3.0, - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 0, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 2, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "count(zookeeper_status_quorumsize{env=\"$env\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "ZK: Quorum Size", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Average size of ZNodes in the cluster.\n Getting the node count per server, and averaging the node count.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 4, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 3, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "avg(zookeeper_inmemorydatatree_nodecount{env=\"$env\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "ZK: ZNodes (avg.)", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Sum of the number of alive connections per servers divided by the maximum number of client connections allowed per host.\n If the percentage is higher than 60%, then Zookeeper should be scaled and/or the Zookeeper clients should be investigated to find the reason for high number of connections opened.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - }, - { - "color": "yellow", - "index": 1, - "line": true, - "op": "gt", - "value": 0.6, - "yaxis": "left" - }, - { - "color": "red", - "index": 2, - "line": true, - "op": "gt", - "value": 0.8, - "yaxis": "left" - } - ] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 8, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 4, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "zookeeper_numaliveconnections{env=\"$env\"} / zookeeper_maxclientcnxnsperhost{env=\"$env\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "ZK: Connections used", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Sum of client watchers subscribed to changes on the ZNodes.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": {}, - "decimals": null, - "mappings": [], - "noValue": "none", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "blue", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 12, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 5, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "last" - ], - "fields": "", - "values": false - }, - "textMode": "auto" - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum(zookeeper_inmemorydatatree_watchcount{env=\"$env\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "ZK: Sum of watchers", - "transformations": [], - "transparent": false, - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Number of requests waiting for processing (queued).\n If the number of outstanding requests grows higher than 10, then the Zookeeper hosts should be checked.\n It could mean that there is not enough resources to cope with the number of requests.\n ", - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "index": 0, - "line": true, - "op": "gt", - "value": "null", - "yaxis": "left" - }, - { - "color": "yellow", - "index": 1, - "line": true, - "op": "gt", - "value": 1.0, - "yaxis": "left" - }, - { - "color": "red", - "index": 2, - "line": true, - "op": "gt", - "value": 10.0, - "yaxis": "left" - } - ] - }, - "unit": "" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 8, - "x": 16, - "y": 0 - }, - "height": null, - "hideTimeOverride": false, - "id": 6, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "last" - ], - "displayMode": "table", - "placement": "right" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "zookeeper_outstandingrequests{env=\"$env\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{hostname}} ({{server_id}}:{{member_type}})", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "ZK: Outstanding Requests", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "collapsed": false, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 1 - }, - "height": null, - "hideTimeOverride": false, - "id": 7, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "System", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 1 - }, - "height": null, - "hideTimeOverride": false, - "id": 8, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "irate(process_cpu_seconds_total{env=\"$env\",hostname=~\"$zk_server\"}[5m])", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "CPU usage", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 1 - }, - "height": null, - "hideTimeOverride": false, - "id": 9, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum without(area)(jvm_memory_bytes_used{env=\"$env\",hostname=\"$zk_server\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Memory usage", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 1 - }, - "height": null, - "hideTimeOverride": false, - "id": 10, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "sum without(gc)(irate(jvm_gc_collection_seconds_sum{env=\"$env\",hostname=\"$zk_server\"}[5m]))", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "GC collection", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 2 - }, - "height": null, - "hideTimeOverride": false, - "id": 11, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 2 - }, - "height": null, - "hideTimeOverride": false, - "id": 12, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "zookeeper_minrequestlatency{env=\"$env\"} * zookeeper_ticktime", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "ZK: Request Latency (Minimum)", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 2 - }, - "height": null, - "hideTimeOverride": false, - "id": 13, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "zookeeper_avgrequestlatency{env=\"$env\"} * zookeeper_ticktime", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "ZK: Request Latency (Average)", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 2 - }, - "height": null, - "hideTimeOverride": false, - "id": 14, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "zookeeper_maxrequestlatency{env=\"$env\"} * zookeeper_ticktime", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "ZK: Request Latency (Maximum)", - "transformations": [], - "transparent": false, - "type": "timeseries" - } - ], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Server Latency", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 3 - }, - "height": null, - "hideTimeOverride": false, - "id": 15, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 3 - }, - "height": null, - "hideTimeOverride": false, - "id": 16, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_server_zookeeperclientmetrics_zookeeperrequestlatencyms{env=\"$env\",quantile=~\"$quantile\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Kafka: Request Latency", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 3 - }, - "height": null, - "hideTimeOverride": false, - "id": 17, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_server_sessionexpirelistener_zookeepersyncconnectspersec{env=\"$env\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Kafka: Sync Connections/sec", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 16, - "y": 3 - }, - "height": null, - "hideTimeOverride": false, - "id": 18, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_server_sessionexpirelistener_zookeeperexpirespersec{env=\"$env\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Kafka: Expired Connections/sec", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 0, - "y": 4 - }, - "height": null, - "hideTimeOverride": false, - "id": 19, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_server_sessionexpirelistener_zookeeperdisconnectspersec{env=\"$env\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Kafka: Disconnected Connections/sec", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "mode": "normal" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 4 - }, - "height": null, - "hideTimeOverride": false, - "id": 20, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ - { - "datasource": null, - "expr": "kafka_server_sessionexpirelistener_zookeeperauthfailurespersec{env=\"$env\"}", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{hostname}}", - "metric": "", - "refId": "", - "step": 10, - "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Kafka: Auth Failures on Connections/sec", - "transformations": [], - "transparent": false, - "type": "timeseries" - } - ], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Client Latency (Kafka)", - "transformations": [], - "transparent": false, - "type": "row" - } - ], - "refresh": "30s", - "rows": [], - "schemaVersion": 12, - "sharedCrosshair": false, - "style": "dark", - "tags": [ - "confluent", - "kafka", - "zookeeper" - ], - "templating": { - "list": [ - { - "allValue": null, - "auto": false, - "auto_count": 30, - "auto_min": "10s", - "current": { - "selected": false, - "tags": [], - "text": null, - "value": null - }, - "datasource": "Prometheus", - "hide": 0, - "includeAll": false, - "label": "Environment", - "multi": false, - "name": "env", - "options": [], - "query": "label_values(env)", - "refresh": 1, - "regex": null, - "sort": 1, - "tagValuesQuery": null, - "tagsQuery": null, - "type": "query", - "useTags": false - }, - { - "allValue": null, - "auto": false, - "auto_count": 30, - "auto_min": "10s", - "current": { - "selected": false, - "tags": [], - "text": null, - "value": null - }, - "datasource": "Prometheus", - "hide": 0, - "includeAll": true, - "label": "Server", - "multi": true, - "name": "zk_server", - "options": [], - "query": "label_values(zookeeper_outstandingrequests{env=\"$env\"}, hostname)", - "refresh": 1, - "regex": null, - "sort": 1, - "tagValuesQuery": null, - "tagsQuery": null, - "type": "query", - "useTags": false - }, - { - "allValue": null, - "auto": false, - "auto_count": 30, - "auto_min": "10s", - "current": { - "selected": false, - "tags": [], - "text": null, - "value": null - }, - "datasource": "Prometheus", - "hide": 0, - "includeAll": false, - "label": "Quantile", - "multi": false, - "name": "quantile", - "options": [], - "query": "label_values(kafka_server_zookeeperclientmetrics_zookeeperrequestlatencyms{env=\"$env\"}, quantile)", - "refresh": 1, - "regex": null, - "sort": 1, - "tagValuesQuery": null, - "tagsQuery": null, - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "hidden": false, - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "browser", - "title": "Zookeeper cluster - v2", - "uid": null, - "version": 0 -} diff --git a/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/confluent-platform.json b/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/confluent-platform.json index d0cb048c..4e89ac2d 100644 --- a/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/confluent-platform.json +++ b/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/confluent-platform.json @@ -1,78 +1,103 @@ { + "__inputs": [ + { + "description": "", + "label": "Prometheus", + "name": "DS_PROMETHEUS", + "pluginId": "prometheus", + "pluginName": "Prometheus", + "type": "datasource" + } + ], "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "target": { - "limit": 100, - "matchAny": false, - "tags": [], - "type": "dashboard" - }, - "type": "dashboard" - } - ] + "list": [] }, + "description": "Overview of the main health-check metrics from Confluent Platform components.", "editable": true, "gnetId": null, - "graphTooltip": 0, - "id": 3, - "iteration": 1634040033398, + "hideControls": false, + "id": null, "links": [], "panels": [ { + "cacheTimeout": null, "collapsed": false, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }, - "id": 35, + "height": null, + "hideTimeOverride": false, + "id": 1, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [], - "title": "Zookeeper", + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Zookeeper cluster", + "transformations": [], + "transparent": false, "type": "row" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Quorum Size of Zookeeper ensemble", + "description": "Quorum Size of Zookeeper ensemble.\n Count Zookeeper servers with quorum size metric.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#d44a3a", - "value": null + "color": "red", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" }, { - "color": "rgba(237, 129, 40, 0.89)", - "value": 2 + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 2.0, + "yaxis": "left" }, { - "color": "#299c46", - "value": 3 + "color": "green", + "index": 2, + "line": true, + "op": "gt", + "value": 3.0, + "yaxis": "left" } ] }, @@ -81,70 +106,81 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 0, - "y": 1 + "y": 0 }, - "id": 16, + "height": null, + "hideTimeOverride": false, + "id": 2, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "count(zookeeper_status_quorumsize{job=\"zookeeper\",env=\"$env\"})", + "datasource": null, + "expr": "count(zookeeper_status_quorumsize{env=\"$env\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Zookeeper nodes online", + "title": "ZK: Quorum Size", + "transformations": [], + "transparent": false, "type": "stat" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": "Average size of ZNodes in the cluster.\n Getting the node count per server, and averaging the node count.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] }, @@ -153,158 +189,180 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 4, - "y": 1 + "y": 0 }, - "id": 18, + "height": null, + "hideTimeOverride": false, + "id": 3, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "avg(zookeeper_inmemorydatatree_nodecount{job=\"zookeeper\",env=\"$env\"})", + "datasource": null, + "expr": "avg(zookeeper_inmemorydatatree_nodecount{env=\"$env\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Number of ZNodes", + "title": "ZK: ZNodes (avg.)", + "transformations": [], + "transparent": false, "type": "stat" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Number of Alive Connections", + "description": "Sum of the number of alive connections per servers divided by the maximum number of client connections allowed per host.\n If the percentage is higher than 60%, then Zookeeper should be scaled and/or the Zookeeper clients should be investigated to find the reason for high number of connections opened.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#299c46", - "value": null + "color": "green", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" }, { - "color": "rgba(237, 129, 40, 0.89)", - "value": 100 + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 0.6, + "yaxis": "left" }, { - "color": "#d44a3a", - "value": 200 + "color": "red", + "index": 2, + "line": true, + "op": "gt", + "value": 0.8, + "yaxis": "left" } ] }, - "unit": "none" + "unit": "percentunit" }, "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 8, - "y": 1 + "y": 0 }, - "id": 20, + "height": null, + "hideTimeOverride": false, + "id": 4, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(zookeeper_numaliveconnections{job=\"zookeeper\",env=\"$env\"})", + "datasource": null, + "expr": "zookeeper_numaliveconnections{env=\"$env\"} / zookeeper_maxclientcnxnsperhost{env=\"$env\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Alive Connections", + "title": "ZK: Connections used", + "transformations": [], + "transparent": false, "type": "stat" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Number of Watchers", + "description": "Sum of client watchers subscribed to changes on the ZNodes.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#299c46", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 500 - }, - { - "color": "#d44a3a", - "value": 1000 + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] }, @@ -313,170 +371,246 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 12, - "y": 1 + "y": 0 }, - "id": 22, + "height": null, + "hideTimeOverride": false, + "id": 5, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(zookeeper_inmemorydatatree_watchcount{job=\"zookeeper\",env=\"$env\"})", + "datasource": null, + "expr": "sum(zookeeper_inmemorydatatree_watchcount{env=\"$env\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Number of Watchers", + "title": "ZK: Sum of watchers", + "transformations": [], + "transparent": false, "type": "stat" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Number of Alive Connections", + "description": "Number of requests waiting for processing (queued).\n If the number of outstanding requests grows higher than 10, then the Zookeeper hosts should be checked.\n It could mean that there is not enough resources to cope with the number of requests.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { - "mode": "thresholds" + "mode": "palette-classic" }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" } - ], + }, + "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { - "color": "#299c46", - "value": null + "color": "green", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" }, { - "color": "rgba(237, 129, 40, 0.89)", - "value": 100 + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" }, { - "color": "#d44a3a", - "value": 200 + "color": "red", + "index": 2, + "line": true, + "op": "gt", + "value": 10.0, + "yaxis": "left" } ] }, - "unit": "none" + "unit": "" }, "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 8, "x": 16, - "y": 1 + "y": 0 }, - "id": 24, + "height": null, + "hideTimeOverride": false, + "id": 6, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "reduceOptions": { + "legend": { "calcs": [ - "lastNotNull" + "max", + "last" ], - "fields": "", - "values": false + "displayMode": "table", + "placement": "right" }, - "text": {}, - "textMode": "auto" + "tooltip": { + "mode": "single" + } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "zookeeper_outstandingrequests{job=\"zookeeper\",env=\"$env\"}", - "instant": true, + "datasource": null, + "expr": "zookeeper_outstandingrequests{env=\"$env\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{instance}} ({{member_type}})", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{pod}} ({{server_id}}:{{member_type}})", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Outstanding Requests", - "type": "stat" + "title": "ZK: Outstanding Requests", + "transformations": [], + "transparent": false, + "type": "timeseries" }, { + "cacheTimeout": null, "collapsed": false, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 5 + "y": 1 }, - "id": 4, + "height": null, + "hideTimeOverride": false, + "id": 7, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [], - "title": "Kafka Cluster", + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Kafka cluster", + "transformations": [], + "transparent": false, "type": "row" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Number of active controllers in the cluster.", + "description": "Count of brokers available (online).\n This value is referential and should not be used for alerting.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#299c46", - "value": null - }, - { - "color": "#e5ac0e", - "value": 2 - }, - { - "color": "#bf1b00" + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] }, @@ -485,80 +619,81 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 0, - "y": 6 + "y": 1 }, - "id": 2, + "height": null, + "hideTimeOverride": false, + "id": 8, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "vertical", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, - "textMode": "value_and_name" + "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "kafka_controller_kafkacontroller_activecontrollercount{job=\"kafka-broker\",env=\"$env\"} > 0", + "datasource": null, + "expr": "count(kafka_server_replicamanager_leadercount{env=\"$env\"})", "format": "time_series", - "instant": true, + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Active Controllers", + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Online Brokers", + "transformations": [], + "transparent": false, "type": "stat" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Number of Brokers Online", + "description": "Active Controller broker.\n It should always be 1. If the value is different than 1, then it must be alerted for troubleshooting.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#d44a3a", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 0 - }, - { - "color": "#299c46", - "value": 2 + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] }, @@ -567,82 +702,81 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 4, - "y": 6 + "y": 1 }, - "id": 6, + "height": null, + "hideTimeOverride": false, + "id": 9, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, - "textMode": "auto" + "textMode": "value_and_name" }, - "pluginVersion": "8.1.3", "repeat": null, - "repeatDirection": "h", + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "count(kafka_server_replicamanager_leadercount{job=\"kafka-broker\",env=\"$env\"})", + "datasource": null, + "expr": "kafka_controller_kafkacontroller_activecontrollercount{env=\"$env\"} > 0", "format": "time_series", - "instant": true, + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Brokers Online", + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Active Controller", + "transformations": [], + "transparent": false, "type": "stat" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Partitions that are online", + "description": "Sum of Topic partitions across the cluster.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#d44a3a", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 0 - }, - { - "color": "#299c46", - "value": 0 + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] }, @@ -651,80 +785,89 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 8, - "y": 6 + "y": 1 }, - "id": 8, + "height": null, + "hideTimeOverride": false, + "id": 10, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(kafka_server_replicamanager_partitioncount{job=\"kafka-broker\",env=\"$env\"})", + "datasource": null, + "expr": "sum(kafka_server_replicamanager_partitioncount{env=\"$env\"})", "format": "time_series", - "instant": true, + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 1, + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Online Partitions", + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Sum of Partitions", + "transformations": [], + "transparent": false, "type": "stat" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Number of under-replicated partitions (| ISR | < | all replicas |).", + "description": "Sum of Under-Replicated Partitions. This is caused by broker or volumes unavailable, impacting replicas to be offline, and reducing the ISR set for those partitions.\n There are transient scenarios that could lead to this number growing (e.g. broker restart), but if the number doesn't shrink in a short period of time (e.g. 1 minute), then it's recommended to alert.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#508642", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 1 + "color": "green", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" }, { - "color": "#bf1b00", - "value": 5 + "color": "red", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" } ] }, @@ -733,81 +876,89 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 12, - "y": 6 + "y": 1 }, - "id": 10, + "height": null, + "hideTimeOverride": false, + "id": 11, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(kafka_server_replicamanager_underreplicatedpartitions{job=\"kafka-broker\",env=\"$env\"})", + "datasource": null, + "expr": "sum(kafka_server_replicamanager_underreplicatedpartitions{env=\"$env\"})", "format": "time_series", "hide": false, - "instant": true, + "instant": false, "interval": "", "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Under Replicated Partitions", + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Sum of Under-Replicated Partitions (URP)", + "transformations": [], + "transparent": false, "type": "stat" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Number of partitions under min insync replicas.", + "description": "Number of partitions where the number of replicas offline is higher than the minimum ISR configuration.\n This means partitions are not available for Producers with acks=all.\n It's recommended alerting when this values is higher than 0.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#508642", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 1 + "color": "green", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" }, { - "color": "#bf1b00", - "value": 5 + "color": "red", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" } ] }, @@ -816,81 +967,89 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 16, - "y": 6 + "y": 1 }, + "height": null, + "hideTimeOverride": false, "id": 12, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(kafka_cluster_partition_underminisr{job=\"kafka-broker\",env=\"$env\"})", + "datasource": null, + "expr": "sum(kafka_cluster_partition_underminisr{env=\"$env\"})", "format": "time_series", "hide": false, - "instant": true, + "instant": false, "interval": "", "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Under Min ISR Partitions", + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Sum of Under-MinISR Partitions", + "transformations": [], + "transparent": false, "type": "stat" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Number of partitions that dont have an active leader and are hence not writable or readable.", + "description": "Number of partitions where all replicas are offline.\n Producers and Consumers are affected by this condition.\n It's recommended alerting when this values is higher than 0.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#508642", - "value": null - }, - { - "color": "#ef843c", - "value": 1 + "color": "green", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" }, { - "color": "#bf1b00", - "value": 1 + "color": "red", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" } ] }, @@ -899,661 +1058,138 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 20, - "y": 6 + "y": 1 }, - "id": 14, + "height": null, + "hideTimeOverride": false, + "id": 13, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(kafka_controller_kafkacontroller_offlinepartitionscount{job=\"kafka-broker\",env=\"$env\"})", + "datasource": null, + "expr": "sum(kafka_controller_kafkacontroller_offlinepartitionscount{env=\"$env\"})", "format": "time_series", - "instant": true, + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 1, + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Offline Partitions Count", + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Sum of Offline Partitions", + "transformations": [], + "transparent": false, "type": "stat" }, { + "cacheTimeout": null, "collapsed": false, "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 10 - }, - "id": 26, - "panels": [], - "title": "Shema Registry", - "type": "row" - }, - { - "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "red", - "value": null - }, - { - "color": "green", - "value": 1 - } - ] + "steps": [] } - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 0, - "y": 11 - }, - "id": 30, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "mean" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "exemplar": true, - "expr": "count(kafka_schema_registry_registered_count{job=\"schema-registry\",env=\"$env\"})", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "A" } - ], - "timeFrom": null, - "timeShift": null, - "title": "Schema Registry Instances", - "type": "stat" - }, - { - "datasource": "Prometheus", - "fieldConfig": { - "defaults": { - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - } - }, - "overrides": [] }, - "gridPos": { - "h": 4, - "w": 4, - "x": 4, - "y": 11 - }, - "id": 28, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "mean" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "exemplar": true, - "expr": "sum(kafka_schema_registry_registered_count{job=\"schema-registry\",env=\"$env\"})", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Schemas registered", - "type": "stat" - }, - { - "datasource": "Prometheus", - "fieldConfig": { - "defaults": { - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 8, - "y": 11 - }, - "id": 33, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "mean" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "exemplar": true, - "expr": "sum(kafka_schema_registry_schemas_deleted{job=\"schema-registry\",env=\"$env\"})", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Schemas deleted", - "type": "stat" - }, - { - "collapsed": false, - "datasource": null, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 15 - }, - "id": 37, - "panels": [], - "repeat": "cluster", - "title": "Kafka Connect ($kafka_connect_cluster_id) ", - "type": "row" - }, - { - "datasource": "Prometheus", - "fieldConfig": { - "defaults": { - "decimals": 0, - "mappings": [], - "noValue": "0", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 0, - "y": 16 + "y": 2 }, - "id": 39, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "mean" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "exemplar": true, - "expr": "count(kafka_connect_connect_worker_metrics_connector_count{env=\"$env\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\"})", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Connect worker instances", - "type": "stat" - }, - { - "datasource": "Prometheus", - "fieldConfig": { - "defaults": { - "decimals": 0, - "mappings": [], - "noValue": "0", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 4, - "y": 16 - }, - "id": 48, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "mean" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "exemplar": true, - "expr": "sum(kafka_connect_connect_worker_metrics_connector_total_task_count{env=\"$env\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\"})", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Tasks Total", - "type": "stat" - }, - { - "datasource": "Prometheus", - "fieldConfig": { - "defaults": { - "decimals": 0, - "mappings": [], - "noValue": "0", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 8, - "y": 16 - }, - "id": 41, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "mean" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "exemplar": true, - "expr": "sum(kafka_connect_connect_worker_metrics_connector_running_task_count{env=\"$env\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\"})", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Tasks Running", - "type": "stat" - }, - { - "datasource": "Prometheus", - "fieldConfig": { - "defaults": { - "decimals": 0, - "mappings": [], - "noValue": "0", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "orange", - "value": 1 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 12, - "y": 16 - }, - "id": 43, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "mean" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "expr": "sum(kafka_connect_connect_worker_metrics_connector_paused_task_count{env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\"})", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Tasks Paused", - "transformations": [], - "type": "stat" - }, - { - "datasource": "Prometheus", - "fieldConfig": { - "defaults": { - "decimals": 0, - "mappings": [], - "noValue": "0", - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 1 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 16, - "y": 16 - }, - "id": 45, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "expr": "sum(kafka_connect_connect_worker_metrics_connector_failed_task_count{env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\"})", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Tasks Failed", - "transformations": [], - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Time since last rebalance", - "fieldConfig": { - "defaults": { - "decimals": 0, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "#299c46", - "value": null - } - ] - }, - "unit": "clockms" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 20, - "y": 16 - }, - "id": 47, + "height": null, + "hideTimeOverride": false, + "id": 14, "interval": null, "links": [], "maxDataPoints": 100, - "options": { - "colorMode": "value", - "fieldOptions": { - "calcs": [ - "lastNotNull" - ] - }, - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "8.1.3", - "repeat": "instance", - "targets": [ - { - "exemplar": true, - "expr": "kafka_connect_connect_worker_rebalance_metrics_time_since_last_rebalance_ms{env=\"$env\",job=\"connect\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\"} >= 0", - "format": "time_series", - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{instance}}", - "refId": "A" - } - ], - "title": "Time since last rebalance ", - "type": "stat" - }, - { - "collapsed": false, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 20 - }, - "id": 52, + "maxPerRow": null, + "minSpan": null, "panels": [], - "repeat": "clusterid", - "title": "ksqlDB Cluster ($ksqldb_cluster_id) ", + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Schema Registry cluster", + "transformations": [], + "transparent": false, "type": "row" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Average number of active queries per server.", + "description": "Schema Registry online instances returning metrics.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#d44a3a", - "value": null + "color": "red", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" }, { - "color": "rgba(237, 129, 40, 0.89)", - "value": 0 + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" }, { - "color": "#299c46", - "value": 1 + "color": "green", + "index": 2, + "line": true, + "op": "gt", + "value": 2.0, + "yaxis": "left" } ] }, @@ -1562,77 +1198,81 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 0, - "y": 21 + "y": 2 }, - "id": 50, + "height": null, + "hideTimeOverride": false, + "id": 15, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "count(ksql_ksql_engine_query_stats_num_active_queries{job=\"ksqldb\", env=\"$env\", ksql_cluster=\"$ksqldb_cluster_id\"})", - "instant": true, + "datasource": null, + "expr": "count(kafka_schema_registry_registered_count{env=\"$env\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "ksqlDB instances", + "title": "SR: Online instances", + "transformations": [], + "transparent": false, "type": "stat" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Average number of active queries per server.", + "description": "Average number of registered schemas across the cluster.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#d44a3a", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 0 - }, - { - "color": "#299c46", - "value": 1 + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] }, @@ -1641,77 +1281,81 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 4, - "y": 21 + "y": 2 }, - "id": 53, + "height": null, + "hideTimeOverride": false, + "id": 16, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "avg(ksql_ksql_engine_query_stats_num_active_queries{job=\"ksqldb\", env=\"$env\", ksql_cluster=\"$ksqldb_cluster_id\"})", - "instant": true, + "datasource": null, + "expr": "avg(kafka_schema_registry_registered_count{env=\"$env\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Active Queries", + "title": "SR: Registered Schemas (avg.)", + "transformations": [], + "transparent": false, "type": "stat" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Num of created queries", + "description": "Average number of schemas created, by type.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#299c46", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 500 - }, - { - "color": "#d44a3a", - "value": 800 + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] }, @@ -1720,76 +1364,81 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 8, - "y": 21 + "y": 2 }, - "id": 55, + "height": null, + "hideTimeOverride": false, + "id": 17, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "avg(ksql_ksql_engine_query_stats_running_queries{job=\"ksqldb\", env=\"$env\", ksql_cluster=\"$ksqldb_cluster_id\"})", + "datasource": null, + "expr": "avg(kafka_schema_registry_schemas_created{env=\"$env\"}) by (schema_type)", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{schema_type}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Running Queries", + "title": "SR: Schemas Created by Type (avg.)", + "transformations": [], + "transparent": false, "type": "stat" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Num of rebalancing queries", + "description": "Average number of schemas deleted, by type.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#299c46", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 1 - }, - { - "color": "#d44a3a", - "value": 5 + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] }, @@ -1798,300 +1447,1235 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 12, - "y": 21 + "y": 2 }, - "id": 57, + "height": null, + "hideTimeOverride": false, + "id": 18, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(ksql_ksql_engine_query_stats_rebalancing_queries{job=\"ksqldb\", env=\"$env\", ksql_cluster=\"$ksqldb_cluster_id\"})", + "datasource": null, + "expr": "sum(kafka_schema_registry_schemas_deleted{env=\"$env\"}) by (schema_type)", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{schema_type}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Rebalancing Queries", + "title": "SR: Schemas Deleted by Type (avg.)", + "transformations": [], + "transparent": false, "type": "stat" }, { "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Number of error query", + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "#299c46", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 1 - }, - { - "color": "#d44a3a", - "value": 2 - } - ] - }, - "unit": "none" - }, - "overrides": [] + "steps": [] + } + } }, "gridPos": { - "h": 4, - "w": 4, - "x": 16, - "y": 21 + "h": 1, + "w": 24, + "x": 0, + "y": 3 }, - "id": 59, + "height": null, + "hideTimeOverride": false, + "id": 19, "interval": null, "links": [], "maxDataPoints": 100, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "Kafka Connect online workers returning metrics.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 0, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 20, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "count(kafka_connect_connect_worker_metrics_connector_count{env=\"$env\",kafka_connect_cluster_id=~\"$connect_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } ], - "fields": "", - "values": false + "timeFrom": null, + "timeShift": null, + "title": "Connect: Online Workers", + "transformations": [], + "transparent": false, + "type": "stat" }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "8.1.3", - "targets": [ { - "exemplar": true, - "expr": "avg(ksql_ksql_engine_query_stats_error_queries{job=\"ksqldb\", env=\"$env\", ksql_cluster=\"$ksqldb_cluster_id\"})", - "interval": "", - "legendFormat": "", - "refId": "A" + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "Number of tasks deployed on Kafka Connect cluster.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 4, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 21, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_connect_connect_worker_metrics_connector_total_task_count{env=\"$env\",kafka_connect_cluster_id=~\"$connect_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connect: Sum of Total Tasks", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "Number of Running Tasks on the Kafka Connect cluster.\n Ideally, this number should be equal to the total number of tasks deployed.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "green", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 8, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 22, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_connect_connect_worker_metrics_connector_running_task_count{env=\"$env\",kafka_connect_cluster_id=~\"$connect_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connect: Sum of Running Tasks", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "Number of Paused Tasks on the Kafka Connect cluster.\n Ideally, this number should be zero, as tasks should be running.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 12, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 23, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_connect_connect_worker_metrics_connector_paused_task_count{env=\"$env\",kafka_connect_cluster_id=~\"$connect_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connect: Sum of Paused Tasks", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "Number of Paused Tasks on the Kafka Connect cluster.\n Ideally, this number should be zero, as tasks should be running.\n It's recommended alerting when this value is higher than 0.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "red", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 16, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 24, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(kafka_connect_connect_worker_metrics_connector_failed_task_count{env=\"$env\",kafka_connect_cluster_id=~\"$connect_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connect: Sum of Failed Tasks", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "Informative value. Time since last rebalance.\n When this value is continuously and repeatedly low means some connectors are failing and rebalancing is triggered constantly.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "clockms" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 20, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 25, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "none", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_connect_worker_rebalance_metrics_time_since_last_rebalance_ms{env=\"$env\",kafka_connect_cluster_id=~\"$connect_cluster\"} >= 0", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connect: Time since last rebalance", + "transformations": [], + "transparent": false, + "type": "stat" } ], + "repeat": "connect_cluster", + "repeatDirection": null, + "span": null, + "targets": [], "timeFrom": null, "timeShift": null, - "title": "Queries in Error State", - "type": "stat" + "title": "Kafka Connect cluster: $connect_cluster", + "transformations": [], + "transparent": false, + "type": "row" }, { "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Num of not running queries", + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "#299c46", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 1 - }, - { - "color": "#d44a3a", - "value": 5 - } - ] - }, - "unit": "none" - }, - "overrides": [] + "steps": [] + } + } }, "gridPos": { - "h": 4, - "w": 4, - "x": 20, - "y": 21 + "h": 1, + "w": 24, + "x": 0, + "y": 4 }, - "id": 61, + "height": null, + "hideTimeOverride": false, + "id": 26, "interval": null, "links": [], "maxDataPoints": 100, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "ksqlDB online instances returning metrics.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 0, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 27, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "count(ksql_ksql_engine_query_stats_num_active_queries{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } ], - "fields": "", - "values": false + "timeFrom": null, + "timeShift": null, + "title": "ksqlDB: Online instances", + "transformations": [], + "transparent": false, + "type": "stat" }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "8.1.3", - "targets": [ { - "exemplar": true, - "expr": "sum(ksql_ksql_engine_query_stats_not_running_queries{job=\"ksqldb\", env=\"$env\", ksql_cluster=\"$ksqldb_cluster_id\"})", - "interval": "", - "legendFormat": "", - "refId": "A" + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "Number of active queries deployed in the cluster.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 4, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 28, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(ksql_ksql_engine_query_stats_num_active_queries{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ksqlDB: Sum of Active Queries", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "Number of running queries deployed in the cluster.\n Ideally, this number should be equal to the number of active queries as queries should be running.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "green", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 8, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 29, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(ksql_ksql_engine_query_stats_running_queries{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ksqlDB: Sum of Running Queries", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "Number of queries rebalancing in the cluster.\n Ideally, this number should be equal zero, or return to zero in a short period (e.g. 1 minute).\n It's recommended to alert if the number of rebalancing queries stay higher than 0 for a longer period of time.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 12, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 30, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum(ksql_ksql_engine_query_stats_rebalancing_queries{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ksqlDB: Sum of Rebalancing Queries", + "transformations": [], + "transparent": false, + "type": "stat" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "Number of queries failed in the cluster.\n Ideally, this number should be equal zero.\n It's recommended to alert if the number of queries failed is higher than 0.\n ", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "red", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 5, + "w": 4, + "x": 16, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 31, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "last" + ], + "fields": "", + "values": false + }, + "textMode": "auto" + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "avg(ksql_ksql_engine_query_stats_error_queries{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connect: Sum of Queries Failed", + "transformations": [], + "transparent": false, + "type": "stat" } ], + "repeat": "ksqldb_cluster", + "repeatDirection": null, + "span": null, + "targets": [], "timeFrom": null, "timeShift": null, - "title": "Stopped Queries", - "type": "stat" + "title": "ksqlDB cluster: $ksqldb_cluster", + "transformations": [], + "transparent": false, + "type": "row" } ], - "refresh": "1m", - "schemaVersion": 30, + "refresh": "30s", + "rows": [], + "schemaVersion": 12, + "sharedCrosshair": false, "style": "dark", - "tags": [], + "tags": [ + "confluent", + "kafka", + "zookeeper", + "kafka-connect", + "schema-registry", + "ksqldb" + ], "templating": { "list": [ { "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { "selected": false, - "text": "dev", - "value": "dev" + "tags": [], + "text": null, + "value": null }, - "datasource": null, - "definition": "label_values(env)", - "description": null, - "error": null, + "datasource": "Prometheus", "hide": 0, "includeAll": false, "label": "Environment", "multi": false, "name": "env", "options": [], - "query": { - "query": "label_values(env)", - "refId": "StandardVariableQuery" - }, + "query": "label_values(env)", "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "type": "query" + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false }, { "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { - "isNone": true, "selected": false, - "text": "None", - "value": "" + "tags": [], + "text": null, + "value": null }, - "datasource": null, - "definition": "label_values(kafka_connect_cluster_id)", - "description": null, - "error": null, - "hide": 0, + "datasource": "Prometheus", + "hide": true, "includeAll": false, - "label": "Kafka Connect Cluster ID", + "label": "Kafka Connect cluster", "multi": false, - "name": "kafka_connect_cluster_id", + "name": "connect_cluster", "options": [], - "query": { - "query": "label_values(kafka_connect_cluster_id)", - "refId": "StandardVariableQuery" - }, + "query": "label_values(kafka_connect_connect_worker_metrics_connector_count{env=\"$env\"}, kafka_connect_cluster_id)", "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "type": "query" + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false }, { "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { - "isNone": true, "selected": false, - "text": "None", - "value": "" + "tags": [], + "text": null, + "value": null }, - "datasource": null, - "definition": "label_values(ksql_ksql_engine_query_stats_liveness_indicator,ksql_cluster)", - "description": null, - "error": null, - "hide": 0, + "datasource": "Prometheus", + "hide": true, "includeAll": false, - "label": "ksqlDB Cluster ID", + "label": "ksqlDB cluster", "multi": false, - "name": "ksqldb_cluster_id", + "name": "ksqldb_cluster", "options": [], - "query": { - "query": "label_values(ksql_ksql_engine_query_stats_liveness_indicator,ksql_cluster)", - "refId": "StandardVariableQuery" - }, + "query": "label_values(ksql_ksql_engine_query_stats_liveness_indicator{env=\"$env\"}, ksqldb_cluster_id)", "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "type": "query" + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false } ] }, "time": { - "from": "now-12h", + "from": "now-1h", "to": "now" }, - "timepicker": {}, - "timezone": "", - "title": "Confluent Platform overview", - "uid": "JiqnBMNnz", - "version": 1 -} \ No newline at end of file + "timepicker": { + "hidden": false, + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Confluent Platform overview - v2", + "uid": null, + "version": 0 +} diff --git a/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-cluster.json b/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-cluster.json index 3be6d2a8..1c7f6dbe 100644 --- a/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-cluster.json +++ b/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-cluster.json @@ -1,75 +1,87 @@ { + "__inputs": [ + { + "description": "", + "label": "Prometheus", + "name": "DS_PROMETHEUS", + "pluginId": "prometheus", + "pluginName": "Prometheus", + "type": "datasource" + } + ], "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "target": { - "limit": 100, - "matchAny": false, - "tags": [], - "type": "dashboard" - }, - "type": "dashboard" - } - ] + "list": [] }, - "description": "Kafka resource usage and throughput", + "description": "Overview of the Kafka cluster", "editable": true, - "gnetId": 721, - "graphTooltip": 0, - "id": 6, - "iteration": 1647427255896, + "gnetId": null, + "hideControls": false, + "id": null, "links": [], "panels": [ { + "cacheTimeout": null, "collapsed": false, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }, - "id": 42, + "height": null, + "hideTimeOverride": false, + "id": 1, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [], - "title": "Healthcheck", + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Cluster Overview", + "transformations": [], + "transparent": false, "type": "row" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Number of active controllers in the cluster.", + "description": "Count of brokers available (online).\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#299c46", - "value": null - }, - { - "color": "#e5ac0e", - "value": 2 - }, - { - "color": "#bf1b00" + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] }, @@ -78,79 +90,81 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 0, - "y": 1 + "y": 0 }, - "id": 12, + "height": null, + "hideTimeOverride": false, + "id": 2, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "vertical", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, - "textMode": "value_and_name" + "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_controller_kafkacontroller_activecontrollercount{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\"} > 0", + "datasource": null, + "expr": "count(kafka_server_replicamanager_leadercount{env=\"$env\"})", "format": "time_series", - "instant": true, + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Active Controllers", + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Online Brokers", + "transformations": [], + "transparent": false, "type": "stat" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Number of Brokers Online", + "description": "Active Controller broker.\n It should always be 1. If the value is different than 1, then it must be alerted for troubleshooting.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#d44a3a", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 0 - }, - { - "color": "#299c46", - "value": 2 + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] }, @@ -159,81 +173,81 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 4, - "y": 1 + "y": 0 }, - "id": 14, + "height": null, + "hideTimeOverride": false, + "id": 3, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, - "textMode": "auto" + "textMode": "value_and_name" }, - "pluginVersion": "8.1.3", "repeat": null, - "repeatDirection": "h", + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "count(kafka_server_replicamanager_leadercount{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\"})", + "datasource": null, + "expr": "kafka_controller_kafkacontroller_activecontrollercount{env=\"$env\"} > 0", "format": "time_series", - "instant": true, + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Brokers Online", + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Active Controller", + "transformations": [], + "transparent": false, "type": "stat" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Partitions that are online", + "description": "\n Number of partitions where the preferred replica is not the leader.\n Usually, this number is 0.\n Restarting nodes could cause this values to change, but when reassigning happens the value stabilize.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#d44a3a", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 0 - }, - { - "color": "#299c46", - "value": 0 + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] }, @@ -242,78 +256,81 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 8, - "y": 1 + "y": 0 }, - "id": 18, + "height": null, + "hideTimeOverride": false, + "id": 4, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_server_replicamanager_partitioncount{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\"})", + "datasource": null, + "expr": "sum(kafka_controller_kafkacontroller_preferredreplicaimbalancecount{env=\"$env\"})", "format": "time_series", - "instant": true, + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 1, + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Online Partitions", + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Sum of Preferred Replica Imbalance", + "transformations": [], + "transparent": false, "type": "stat" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": "Number of topics in the cluster.", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#299c46", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 2 - }, - { - "color": "#d44a3a" + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] }, @@ -322,266 +339,247 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 12, "y": 1 }, - "id": 33, + "height": null, + "hideTimeOverride": false, + "id": 5, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_controller_kafkacontroller_preferredreplicaimbalancecount{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\"})", + "datasource": null, + "expr": "sum(kafka_controller_kafkacontroller_globaltopiccount{env=\"$env\"})", "format": "time_series", - "instant": true, + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 1, + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Preferred Replica Imbalance", + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Sum of Topics", + "transformations": [], + "transparent": false, "type": "stat" }, { + "cacheTimeout": null, "datasource": "Prometheus", + "description": "Sum of requests per second rated over a 5 min. period.\n Gives an idea of the processing load in the cluster.", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "Bytes/s", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], + "custom": {}, + "decimals": null, "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] }, - "unit": "Bps" + "unit": "reqps" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 8, + "h": 5, + "w": 4, "x": 16, - "y": 1 + "y": 0 }, - "id": 84, + "height": null, + "hideTimeOverride": false, + "id": 6, + "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { - "legend": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { "calcs": [ - "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", - "placement": "bottom" + "fields": "", + "values": false }, - "tooltip": { - "mode": "single" - } + "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(rate(kafka_server_brokertopicmetrics_bytesinpersec{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",topic!=\"\"}[5m]))", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "Bytes in", - "metric": "kafka_server_brokertopicmetrics_bytesinpersec", - "refId": "A", - "step": 4 - }, - { - "expr": "sum(rate(kafka_server_brokertopicmetrics_bytesoutpersec{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",topic!=\"\"}[5m]))", + "datasource": null, + "expr": "sum(rate(kafka_network_requestmetrics_requestspersec{env=\"$env\",hostname=~\"$broker\"}[5m]))", "format": "time_series", "hide": false, + "instant": false, "interval": "", "intervalFactor": 2, - "legendFormat": "Bytes out", - "metric": "kafka_server_brokertopicmetrics_bytesinpersec", - "refId": "B", - "step": 4 + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Broker network throughput", - "type": "timeseries" + "title": "Kafka: Rate of Requests/Sec", + "transformations": [], + "transparent": false, + "type": "stat" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Number of under-replicated partitions (| ISR | < | all replicas |).", + "description": "Sum of log sizes per broker.\n This must be compared with the total storage space available in the brokers.", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#508642", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 1 - }, - { - "color": "#bf1b00", - "value": 5 + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] }, - "unit": "none" + "unit": "bytes" }, "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, - "x": 0, - "y": 5 + "x": 20, + "y": 0 }, - "id": 20, + "height": null, + "hideTimeOverride": false, + "id": 7, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, - "textMode": "auto" + "textMode": "value_and_name" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_server_replicamanager_underreplicatedpartitions{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\"})", + "datasource": null, + "expr": "sum(kafka_log_log_size{env=\"$env\",hostname=~\"$broker\"}) by (hostname)", "format": "time_series", "hide": false, - "instant": true, + "instant": false, "interval": "", "intervalFactor": 2, - "legendFormat": "", - "refId": "A" + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Under Replicated Partitions", + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Log Size", + "transformations": [], + "transparent": false, "type": "stat" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Number of partitions under min insync replicas.", + "description": "Sum of Topic partitions across the cluster.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#508642", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 1 - }, - { - "color": "#bf1b00", - "value": 5 + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] }, @@ -590,80 +588,89 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, - "x": 4, - "y": 5 + "x": 0, + "y": 1 }, - "id": 32, + "height": null, + "hideTimeOverride": false, + "id": 8, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_cluster_partition_underminisr{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\"})", + "datasource": null, + "expr": "sum(kafka_server_replicamanager_partitioncount{env=\"$env\",hostname=~\"$broker\"})", "format": "time_series", "hide": false, - "instant": true, + "instant": false, "interval": "", "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Under Min ISR Partitions", + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Sum of Partitions", + "transformations": [], + "transparent": false, "type": "stat" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Number of partitions that dont have an active leader and are hence not writable or readable.", + "description": "Sum of Under-Replicated Partitions. This is caused by broker or volumes unavailable, impacting replicas to be offline, and reducing the ISR set for those partitions.\n There are transient scenarios that could lead to this number growing (e.g. broker restart), but if the number doesn't shrink in a short period of time (e.g. 1 minute), then it's recommended to alert.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#508642", - "value": null - }, - { - "color": "#ef843c", - "value": 1 + "color": "green", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" }, { - "color": "#bf1b00", - "value": 1 + "color": "red", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" } ] }, @@ -672,78 +679,89 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, - "x": 8, - "y": 5 + "x": 4, + "y": 1 }, - "id": 22, + "height": null, + "hideTimeOverride": false, + "id": 9, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_controller_kafkacontroller_offlinepartitionscount{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\"})", + "datasource": null, + "expr": "sum(kafka_server_replicamanager_underreplicatedpartitions{env=\"$env\",hostname=~\"$broker\"})", "format": "time_series", - "instant": true, + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 1, + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Offline Partitions Count", + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Sum of Under-Replicated Partitions (URP)", + "transformations": [], + "transparent": false, "type": "stat" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Unclean leader election rate", + "description": "Number of partitions where the number of replicas offline is higher than the minimum ISR configuration.\n This means partitions are not available for Producers with acks=all.\n We recommend alerting when this values is higher than 0.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#299c46", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 2 + "color": "green", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" }, { - "color": "#d44a3a" + "color": "red", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" } ] }, @@ -752,82 +770,89 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, - "x": 12, - "y": 5 + "x": 8, + "y": 1 }, - "id": 16, + "height": null, + "hideTimeOverride": false, + "id": 10, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_controller_controllerstats_uncleanleaderelectionspersec{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\"})", + "datasource": null, + "expr": "sum(kafka_cluster_partition_underminisr{env=\"$env\",hostname=~\"$broker\"})", "format": "time_series", - "instant": true, + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 1, + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Unclean Leader Election Rate", + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Sum of Under-MinISR Partitions", + "transformations": [], + "transparent": false, "type": "stat" }, - { - "collapsed": false, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 9 - }, - "id": 31, - "panels": [], - "title": "Request rate", - "type": "row" - }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Produce request rate.", + "description": "Number of partitions where all replicas are offline.\n Producers and Consumers are affected by this condition.\n We recommend alerting when this values is higher than 0.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { "color": "green", - "value": null + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "red", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" } ] }, @@ -836,184 +861,272 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, - "x": 0, - "y": 10 + "x": 12, + "y": 1 }, - "id": 93, + "height": null, + "hideTimeOverride": false, + "id": 11, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(rate(kafka_network_requestmetrics_requestspersec{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\"}[5m]))", + "datasource": null, + "expr": "sum(kafka_controller_kafkacontroller_offlinepartitionscount{env=\"$env\",hostname=~\"$broker\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "All Request Per Sec", + "title": "Kafka: Sum of Offline Partitions", + "transformations": [], + "transparent": false, "type": "stat" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Produce request rate.", + "description": "Sum of bytes in per second rated over a 5 min. period.\n Gives an idea of the incoming throughput handle by the cluster.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] }, - "unit": "none" + "unit": "binBps" }, "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, - "x": 4, - "y": 10 + "x": 16, + "y": 1 }, - "id": 35, + "height": null, + "hideTimeOverride": false, + "id": 12, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(rate(kafka_network_requestmetrics_requestspersec{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",request=\"Produce\"}[5m]))", + "datasource": null, + "expr": "sum(rate(kafka_server_brokertopicmetrics_bytesinpersec{env=\"$env\",hostname=~\"$broker\"}[5m]))", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Produce Request Per Sec", + "title": "Kafka: Bytes In/Sec", + "transformations": [], + "transparent": false, "type": "stat" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Fetch request rate.", + "description": "Sum of bytes out per second rated over a 5 min. period.\n Gives an idea of the outgoing throughput handle by the cluster.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] }, - "unit": "none" + "unit": "binBps" }, "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, - "x": 8, - "y": 10 + "x": 20, + "y": 1 }, - "id": 37, + "height": null, + "hideTimeOverride": false, + "id": 13, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(rate(kafka_network_requestmetrics_requestspersec{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",request=\"FetchConsumer\"}[5m]))", + "datasource": null, + "expr": "sum(rate(kafka_server_brokertopicmetrics_bytesoutpersec{env=\"$env\",hostname=~\"$broker\"}[5m]))", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Consumer Fetch Request Per Sec", + "title": "Kafka: Bytes Out/Sec", + "transformations": [], + "transparent": false, "type": "stat" }, { + "cacheTimeout": null, + "collapsed": false, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 14, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "System resources", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": "Rate of CPU seconds used by the Java process.\n 100% usage represents one core. \n If there are multiple cores, the total capacity should be 100% * number_cores.", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -1035,14 +1148,12 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, "showPoints": "auto", "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, + "stacking": {}, "thresholdsStyle": { "mode": "off" } @@ -1050,33 +1161,32 @@ "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } + "steps": [] + }, + "unit": "percentunit" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 10 + "h": 10, + "w": 8, + "x": 0, + "y": 2 }, - "id": 122, + "height": null, + "hideTimeOverride": false, + "id": 15, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull", "max", - "mean" + "mean", + "last" ], "displayMode": "table", "placement": "bottom" @@ -1085,1544 +1195,268 @@ "mode": "single" } }, + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "rate(kafka_network_requestmetrics_errorspersec{error!=\"NONE\"}[5m])", + "datasource": null, + "expr": "irate(process_cpu_seconds_total{env=\"$env\",hostname=~\"$broker\"}[5m])", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{error}} @ {{hostname}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Errors", + "timeFrom": null, + "timeShift": null, + "title": "CPU usage", + "transformations": [], + "transparent": false, "type": "timeseries" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Fetch request rate.", + "description": "Sum of JVM memory used, without including areas (e.g. heap size).", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" } - ], + }, + "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "steps": [] }, - "unit": "none" + "unit": "bytes" }, "overrides": [] }, "gridPos": { - "h": 4, - "w": 4, - "x": 0, - "y": 14 + "h": 10, + "w": 8, + "x": 8, + "y": 2 }, - "id": 94, + "height": null, + "hideTimeOverride": false, + "id": 16, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "horizontal", - "reduceOptions": { + "legend": { "calcs": [ - "lastNotNull" + "max", + "mean", + "last" ], - "fields": "", - "values": false + "displayMode": "table", + "placement": "bottom" }, - "text": {}, - "textMode": "auto" + "tooltip": { + "mode": "single" + } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(rate(kafka_network_requestmetrics_requestspersec{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",request=\"Fetch\"}[5m]))", + "datasource": null, + "expr": "sum without(area)(jvm_memory_bytes_used{env=\"$env\",hostname=~\"$broker\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Broker Fetch Request Per Sec", - "type": "stat" + "title": "Memory usage", + "transformations": [], + "transparent": false, + "type": "timeseries" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Offset Commit request rate.", + "description": "Sum of seconds used by Garbage Collection.", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" } - ], + }, + "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "steps": [] }, - "unit": "none" + "unit": "percentunit" }, "overrides": [] }, "gridPos": { - "h": 4, - "w": 4, - "x": 4, - "y": 14 + "h": 10, + "w": 8, + "x": 16, + "y": 2 }, - "id": 38, + "height": null, + "hideTimeOverride": false, + "id": 17, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "horizontal", - "reduceOptions": { + "legend": { "calcs": [ - "lastNotNull" + "max", + "mean", + "last" ], - "fields": "", - "values": false + "displayMode": "table", + "placement": "bottom" }, - "text": {}, - "textMode": "auto" + "tooltip": { + "mode": "single" + } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(rate(kafka_network_requestmetrics_requestspersec{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",request=\"OffsetCommit\"}[5m]))", + "datasource": null, + "expr": "sum without(gc)(irate(jvm_gc_collection_seconds_sum{env=\"$env\",hostname=~\"$broker\"}[5m]))", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Offset Commit Request Per Sec", - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Metadata request rate.", - "fieldConfig": { - "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 8, - "y": 14 - }, - "id": 36, - "interval": null, - "links": [], - "maxDataPoints": 100, - "options": { - "colorMode": "value", - "graphMode": "area", - "justifyMode": "auto", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "expr": "sum(rate(kafka_network_requestmetrics_requestspersec{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",request=\"Metadata\"}[5m]))", - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Metadata Request Per Sec", - "type": "stat" - }, - { - "collapsed": false, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 18 - }, - "id": 40, - "panels": [], - "title": "System", - "type": "row" - }, - { - "datasource": "Prometheus", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "Cores", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "percent" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "localhost:7071" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#629E51", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 0, - "y": 19 - }, - "id": 27, - "links": [], - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "expr": "irate(process_cpu_seconds_total{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\"}[5m])*100", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "metric": "process_cpu_secondspersec", - "refId": "A", - "step": 4 - } - ], - "timeFrom": null, - "timeShift": null, - "title": "CPU Usage", - "type": "timeseries" - }, - { - "datasource": "Prometheus", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "Memory", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "localhost:7071" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#BA43A9", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 8, - "y": 19 - }, - "id": 2, - "links": [], - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "expr": "sum without(area)(jvm_memory_bytes_used{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\"})", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "metric": "jvm_memory_bytes_used", - "refId": "A", - "step": 4 - }, - { - "expr": "jvm_memory_bytes_max{job=\"kafka-broker\",area=\"heap\",env=\"$env\",instance=~\"$instance\"}", - "interval": "", - "legendFormat": "{{instance}}", - "refId": "B" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "JVM Memory Used", - "type": "timeseries" - }, - { - "datasource": "Prometheus", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "% time in GC", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "localhost:7071" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#890F02", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 19 - }, - "id": 3, - "links": [], - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "expr": "sum without(gc)(rate(jvm_gc_collection_seconds_sum{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\"}[5m]))", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "metric": "jvm_gc_collection_seconds_sum", - "refId": "A", - "step": 4 - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Time spent in GC", - "type": "timeseries" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 27 - }, - "id": 29, - "panels": [ - { - "datasource": "Prometheus", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "Messages/s", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "cps" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 0, - "y": 24 - }, - "id": 4, - "links": [], - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "exemplar": true, - "expr": "sum without(instance,topic)(rate(kafka_server_brokertopicmetrics_messagesinpersec{job=\"kafka-broker\",env=\"$env\",topic!=\"\"}[5m]))", - "interval": "", - "intervalFactor": 2, - "legendFormat": "bytes/sec", - "metric": "kafka_server_brokertopicmetrics_messagesinpersec", - "refId": "A", - "step": 4 - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Messages In", - "type": "timeseries" - }, - { - "datasource": "Prometheus", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "Bytes/s", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "Bps" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 8, - "y": 24 - }, - "id": 5, - "links": [], - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "exemplar": true, - "expr": "sum without(instance,topic)(rate(kafka_server_brokertopicmetrics_bytesinpersec{job=\"kafka-broker\",env=\"$env\",topic!=\"\"}[5m]))", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "bytes/sec", - "metric": "kafka_server_brokertopicmetrics_bytesinpersec", - "refId": "A", - "step": 4 - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Bytes In", - "type": "timeseries" - }, - { - "datasource": "Prometheus", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "Bytes/s", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "Bps" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 24 - }, - "id": 6, - "links": [], - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "exemplar": true, - "expr": "sum without(instance,topic)(rate(kafka_server_brokertopicmetrics_bytesoutpersec{job=\"kafka-broker\",env=\"$env\",topic!=\"\"}[5m]))", - "interval": "", - "intervalFactor": 2, - "legendFormat": "bytes/sec", - "metric": "kafka_server_brokertopicmetrics_bytesinpersec", - "refId": "A", - "step": 4 - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Bytes Out", - "type": "timeseries" - }, - { - "datasource": "Prometheus", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "Messages/s", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "cps" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 0, - "y": 32 - }, - "id": 10, - "links": [], - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "expr": "sum without(topic)(rate(kafka_server_brokertopicmetrics_messagesinpersec{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",topic!=\"\"}[5m]))", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "metric": "kafka_server_brokertopicmetrics_messagesinpersec", - "refId": "A", - "step": 4 - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Messages In Per Broker", - "type": "timeseries" - }, - { - "datasource": "Prometheus", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "Bytes/s", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "Bps" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 8, - "y": 32 - }, - "id": 7, - "links": [], - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "expr": "sum without(topic)(rate(kafka_server_brokertopicmetrics_bytesinpersec{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",topic!=\"\"}[5m]))", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{instance}}", - "metric": "kafka_server_brokertopicmetrics_bytesinpersec", - "refId": "A", - "step": 4 - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Bytes In Per Broker", - "type": "timeseries" - }, - { - "datasource": "Prometheus", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "binBps" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 32 - }, - "id": 9, - "links": [], - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "expr": "sum without(topic)(rate(kafka_server_brokertopicmetrics_bytesoutpersec{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",topic!=\"\"}[5m]))", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{instance}}", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Bytes Out Per Broker", - "type": "timeseries" - } - ], - "title": "Throughput In/Out", - "type": "row" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 28 - }, - "id": 44, - "panels": [ - { - "datasource": "Prometheus", - "description": "Average fraction of time the network processor threads are idle. Values are between 0 (all resources are used) and 100 (all resources are available)\n", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 9, - "w": 12, - "x": 0, - "y": 25 - }, - "id": 24, - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "expr": "1-kafka_network_socketserver_networkprocessoravgidlepercent{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\"}", - "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Network Processor Avg Usage Percent", - "type": "timeseries" - }, - { - "datasource": "Prometheus", - "description": "Average fraction of time the request handler threads are idle. Values are between 0 (all resources are used) and 100 (all resources are available).\n", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 25 - }, - "id": 25, - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "expr": "1 - kafka_server_kafkarequesthandlerpool_requesthandleravgidlepercent_total{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\"}", - "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Request Handler Avg Percent", - "type": "timeseries" + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Thread utilization", - "type": "row" + "timeFrom": null, + "timeShift": null, + "title": "GC collection", + "transformations": [], + "transparent": false, + "type": "timeseries" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 29 + "y": 3 }, - "id": 86, + "height": null, + "hideTimeOverride": false, + "id": 18, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "Latency in millseconds for ZooKeeper requests from broker.\n", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 9, - "w": 12, - "x": 0, - "y": 26 - }, - "id": 88, - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "expr": "kafka_server_zookeeperclientmetrics_zookeeperrequestlatencyms{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",quantile=~\"$percentile\"}", - "hide": false, - "legendFormat": "{{instance}} - {{quantile}}", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Zookeeper Request Latency", - "type": "timeseries" - }, - { - "datasource": "Prometheus", - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 26 - }, - "id": 92, - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "expr": "kafka_server_sessionexpirelistener_zookeepersyncconnectspersec{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\"}", - "hide": false, - "instant": false, - "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Zookeeper connections per sec", - "type": "timeseries" - }, - { - "datasource": "Prometheus", - "description": "", + "description": "Number of messages into topics per second, aggregated by sum without topic.", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -2633,7 +1467,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -2644,51 +1478,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "short" + "unit": "cps" }, "overrides": [] }, "gridPos": { - "h": 9, + "h": 10, "w": 8, "x": 0, - "y": 35 + "y": 3 }, - "id": 89, + "height": null, + "hideTimeOverride": false, + "id": 19, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -2697,24 +1525,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_server_sessionexpirelistener_zookeeperexpirespersec{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\"}", + "datasource": null, + "expr": "sum without(topic) (rate(kafka_server_brokertopicmetrics_messagesinpersec{env=\"$env\",hostname=~\"$broker\"}[5m]))", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Zookeeper expired connections per sec", + "title": "Messages In/Sec", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": "Number of bytes into topics per second, aggregated by sum without topic.", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -2725,7 +1567,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -2736,51 +1578,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "short" + "unit": "binBps" }, "overrides": [] }, "gridPos": { - "h": 9, + "h": 10, "w": 8, "x": 8, - "y": 35 + "y": 3 }, - "id": 90, + "height": null, + "hideTimeOverride": false, + "id": 20, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -2789,24 +1625,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_server_sessionexpirelistener_zookeeperdisconnectspersec{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\"}", + "datasource": null, + "expr": "sum without(topic) (rate(kafka_server_brokertopicmetrics_bytesinpersec{env=\"$env\",hostname=~\"$broker\"}[5m]))", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Zookeeper disconnect per sec", + "title": "Bytes In/Sec", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": "Number of bytes out of topics per second, aggregated by sum without topic.", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -2817,7 +1667,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -2828,51 +1678,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "short" + "unit": "binBps" }, "overrides": [] }, "gridPos": { - "h": 9, + "h": 10, "w": 8, "x": 16, - "y": 35 + "y": 3 }, - "id": 91, + "height": null, + "hideTimeOverride": false, + "id": 21, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -2881,39 +1725,80 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_server_sessionexpirelistener_zookeeperauthfailurespersec{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\"}", + "datasource": null, + "expr": "sum without(topic) (rate(kafka_server_brokertopicmetrics_bytesoutpersec{env=\"$env\",hostname=~\"$broker\"}[5m]))", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Zookeeper auth failures per sec", + "title": "Bytes Out/Sec", + "transformations": [], + "transparent": false, "type": "timeseries" } ], - "title": "Zookeeper", + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Throughput", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 30 + "y": 4 }, - "id": 82, + "height": null, + "hideTimeOverride": false, + "id": 22, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { + "cacheTimeout": null, "datasource": "Prometheus", - "description": ": The number of in-sync replicas (ISRs) for a particular partition should remain fairly static, the only exceptions are when you are expanding your broker cluster or removing partitions. In order to maintain high availability, a healthy Kafka cluster requires a minimum number of ISRs for failover. A replica could be removed from the ISR pool for a couple of reasons: it is too far behind the leaders offset (user-configurable by setting the replica.lag.max.messages configuration parameter), or it has not contacted the leader for some time (configurable with the replica.socket.timeout.ms parameter). No matter the reason, an increase in IsrShrinksPerSec without a corresponding increase in IsrExpandsPerSec shortly thereafter is cause for concern and requires user intervention.The Kafka documentation provides a wealth of information on the user-configurable parameters for brokers.", + "description": "Percent of time the network thread pool is used.\n It should be below 60% or the capacity of threads should be tuned or \n the cluster scaled to cope with the load.", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -2924,7 +1809,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -2935,51 +1820,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "ops" + "unit": "percentunit" }, "overrides": [] }, "gridPos": { - "h": 9, - "w": 12, + "h": 10, + "w": 8, "x": 0, - "y": 27 + "y": 4 }, - "id": 80, + "height": null, + "hideTimeOverride": false, + "id": 23, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -2988,23 +1867,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "rate(kafka_server_replicamanager_isrshrinkspersec{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\"}[5m])", + "datasource": null, + "expr": "1-kafka_network_socketserver_networkprocessoravgidlepercent{env=\"$env\",hostname=~\"$broker\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "IsrShrinks per Sec", + "title": "Network processor usage", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": ": The number of in-sync replicas (ISRs) for a particular partition should remain fairly static, the only exceptions are when you are expanding your broker cluster or removing partitions. In order to maintain high availability, a healthy Kafka cluster requires a minimum number of ISRs for failover. A replica could be removed from the ISR pool for a couple of reasons: it is too far behind the leaders offset (user-configurable by setting the replica.lag.max.messages configuration parameter), or it has not contacted the leader for some time (configurable with the replica.socket.timeout.ms parameter). No matter the reason, an increase in IsrShrinksPerSec without a corresponding increase in IsrExpandsPerSec shortly thereafter is cause for concern and requires user intervention.The Kafka documentation provides a wealth of information on the user-configurable parameters for brokers.", + "description": "Percent of time the IO thread pool is used.\n It should be below 60% or the capacity of threads should be tuned or \n the cluster scaled to cope with the load.", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -3015,7 +1909,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -3026,51 +1920,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "ops" + "unit": "percentunit" }, "overrides": [] }, "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 27 + "h": 10, + "w": 8, + "x": 8, + "y": 4 }, - "id": 83, + "height": null, + "hideTimeOverride": false, + "id": 24, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -3079,38 +1967,80 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "rate(kafka_server_replicamanager_isrexpandspersec{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\"}[5m])", + "datasource": null, + "expr": "1-kafka_server_kafkarequesthandlerpool_requesthandleravgidlepercent_total{env=\"$env\",hostname=~\"$broker\"}", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "IsrExpands per Sec", + "title": "Request processor (IO) usage", + "transformations": [], + "transparent": false, "type": "timeseries" } ], - "title": "Isr Shrinks / Expands", + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Thread utilization", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 31 + "y": 5 }, - "id": 53, + "height": null, + "hideTimeOverride": false, + "id": 25, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { + "cacheTimeout": null, "datasource": "Prometheus", + "description": "Requests per second rated over a 5 minutes period.\n Includes API call and version.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -3121,7 +2051,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -3132,50 +2062,48 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, + "showPoints": "auto", + "spanNulls": false, "stacking": { "group": "A", - "mode": "none" + "mode": "normal" }, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "bytes" + "unit": "reqps" }, "overrides": [] }, "gridPos": { - "h": 9, - "w": 12, + "h": 10, + "w": 8, "x": 0, - "y": 28 + "y": 5 }, - "id": 55, + "height": null, + "hideTimeOverride": false, + "id": 26, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -3184,21 +2112,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_log_log_size{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\"}) by (topic)", - "legendFormat": "{{topic}}", - "refId": "A" + "datasource": null, + "expr": "sum without(pod,instance,statefulset_kubernetes_io_pod_name)(rate(kafka_network_requestmetrics_requestspersec{env=\"$env\",hostname=~\"$broker\"}[5m]))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{request}}(v{{version}})", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Log size per Topic", + "title": "Requests rates", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", + "description": "Request Errors per second rated over a 5 minutes period.\n Includes API call and version.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -3209,7 +2154,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -3220,50 +2165,48 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, + "showPoints": "auto", + "spanNulls": false, "stacking": { "group": "A", - "mode": "none" + "mode": "normal" }, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "bytes" + "unit": "reqps" }, "overrides": [] }, "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 28 + "h": 10, + "w": 8, + "x": 8, + "y": 5 }, - "id": 56, + "height": null, + "hideTimeOverride": false, + "id": 27, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -3272,37 +2215,80 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_log_log_size{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\"}) by (instance)", - "legendFormat": "{{instance}}", - "refId": "A" + "datasource": null, + "expr": "sum without(pod,instance,statefulset_kubernetes_io_pod_name)(rate(kafka_network_requestmetrics_errorspersec{env=\"$env\",hostname=~\"$broker\",error!=\"NONE\"}[5m]))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{error}}@{{request}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Log size per Broker", + "title": "Error rates", + "transformations": [], + "transparent": false, "type": "timeseries" } ], - "title": "Logs size", + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Request rates", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 32 + "y": 6 }, - "id": 58, + "height": null, + "hideTimeOverride": false, + "id": 28, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "A high value can imply there aren't enough IO threads or the CPU is a bottleneck, or the request queue isnt large enough. The request queue size should match the number of connections.", + "description": "Sum of connections count across cluster by brokers", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -3313,7 +2299,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -3324,75 +2310,85 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "ms" + "unit": "" }, "overrides": [] }, "gridPos": { - "h": 9, - "w": 12, + "h": 10, + "w": 8, "x": 0, - "y": 29 + "y": 6 }, - "id": 60, + "height": null, + "hideTimeOverride": false, + "id": 29, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_network_requestmetrics_requestqueuetimems{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",quantile=~\"$percentile\",request=\"Produce\"}", + "datasource": null, + "expr": "sum(kafka_server_socketservermetrics_connection_count{env=\"$env\",hostname=~\"$broker\"}) by (hostname)", + "format": "time_series", "hide": false, - "legendFormat": "{{instance}} - {{quantile}}", - "refId": "A" + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Producer - RequestQueueTimeMs", + "title": "Sum of Connections alive per Broker", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "In most cases, a high value can imply slow local storage or the storage is a bottleneck. One should also investigate LogFlushRateAndTimeMs to know how long page flushes are taking, which will also indicate a slow disk. In the case of FetchFollower requests, time spent in LocalTimeMs can be the result of a ZooKeeper write to change the ISR.", + "description": "Sum of rate of connections created across cluster by brokers", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -3403,7 +2399,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -3414,75 +2410,85 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "ms" + "unit": "" }, "overrides": [] }, "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 29 + "h": 10, + "w": 8, + "x": 8, + "y": 6 }, - "id": 61, + "height": null, + "hideTimeOverride": false, + "id": 30, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_network_requestmetrics_localtimems{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",quantile=~\"$percentile\",request=\"Produce\"}", + "datasource": null, + "expr": "sum(kafka_server_socketservermetrics_connection_creation_rate{env=\"$env\",hostname=~\"$broker\"}) by (hostname)", + "format": "time_series", "hide": false, - "legendFormat": "{{instance}} - {{quantile}}", - "refId": "A" + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Producer - LocalTimeMs", + "title": "Sum of Connections creation rate per Broker", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "A high value can imply a slow network connection. For fetch request, if the remote time is high, it could be that there is not enough data to give in a fetch response. This can happen when the consumer or replica is caught up and there is no new incoming data. If this is the case, remote time will be close to the max wait time, which is normal. Max wait time is configured via replica.fetch.wait.max.ms and fetch.max.wait.ms.\n", + "description": "Sum of rate of connections closed across cluster by brokers", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -3493,7 +2499,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -3504,50 +2510,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "ms" + "unit": "" }, "overrides": [] }, "gridPos": { "h": 10, "w": 8, - "x": 0, - "y": 38 + "x": 16, + "y": 6 }, - "id": 62, + "height": null, + "hideTimeOverride": false, + "id": 31, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -3556,23 +2557,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_network_requestmetrics_remotetimems{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",quantile=~\"$percentile\",request=\"Produce\"}", + "datasource": null, + "expr": "sum(kafka_server_socketservermetrics_connection_close_rate{env=\"$env\",hostname=~\"$broker\"}) by (hostname)", + "format": "time_series", "hide": false, - "legendFormat": "{{instance}} - {{quantile}}", - "refId": "A" + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Producer - RemoteTimeMs", + "title": "Sum of Connections close rate per Broker", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "A high value can imply there aren't enough network threads or the network cant dequeue responses quickly enough, causing back pressure in the response queue.\n", + "description": "Sum of connections count across cluster by listeners", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -3583,7 +2599,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -3594,50 +2610,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "ms" + "unit": "" }, "overrides": [] }, "gridPos": { "h": 10, "w": 8, - "x": 8, - "y": 38 + "x": 0, + "y": 7 }, - "id": 63, + "height": null, + "hideTimeOverride": false, + "id": 32, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -3646,23 +2657,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_network_requestmetrics_responsequeuetimems{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",quantile=~\"$percentile\",request=\"Produce\"}", + "datasource": null, + "expr": "sum(kafka_server_socketservermetrics_connection_count{env=\"$env\",hostname=~\"$broker\"}) by (listener)", + "format": "time_series", "hide": false, - "legendFormat": "{{instance}} - {{quantile}}", - "refId": "A" + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{listener}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Producer - ResponseQueueTimeMs", + "title": "Sum of Connections alive per Listener", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "A high value can imply the zero-copy from disk to the network is slow, or the network is the bottleneck because the network cant dequeue responses of the TCP socket as quickly as theyre being created. If the network buffer gets full, Kafka will block.\n", + "description": "Sum of rate of connections created across cluster by listener", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -3673,7 +2699,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -3684,51 +2710,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "ms" + "unit": "" }, "overrides": [] }, "gridPos": { "h": 10, "w": 8, - "x": 16, - "y": 38 + "x": 8, + "y": 7 }, - "id": 64, + "height": null, + "hideTimeOverride": false, + "id": 33, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -3737,38 +2757,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_network_requestmetrics_responsesendtimems{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",quantile=~\"$percentile\",request=\"Produce\"}", + "datasource": null, + "expr": "sum(kafka_server_socketservermetrics_connection_creation_rate{env=\"$env\",hostname=~\"$broker\"}) by (listener)", + "format": "time_series", "hide": false, - "legendFormat": "{{instance}} - {{quantile}}", - "refId": "A" + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{listener}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Producer - ResponseSendTimeMs", + "title": "Sum of Connections creation rate per Listener", + "transformations": [], + "transparent": false, "type": "timeseries" - } - ], - "title": "Producer Performance", - "type": "row" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 33 - }, - "id": 68, - "panels": [ + }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "A high value can imply there aren't enough IO threads or the CPU is a bottleneck, or the request queue isnt large enough. The request queue size should match the number of connections.", + "description": "Sum of rate of connections closed across cluster by listener", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -3779,7 +2799,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -3790,76 +2810,127 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "ms" + "unit": "" }, "overrides": [] }, "gridPos": { - "h": 9, - "w": 12, - "x": 0, - "y": 30 + "h": 10, + "w": 8, + "x": 16, + "y": 7 }, - "id": 69, + "height": null, + "hideTimeOverride": false, + "id": 34, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_network_requestmetrics_requestqueuetimems{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",quantile=~\"$percentile\",request=\"Fetch\"}", + "datasource": null, + "expr": "sum(kafka_server_socketservermetrics_connection_close_rate{env=\"$env\",hostname=~\"$broker\"}) by (listener)", + "format": "time_series", "hide": false, - "legendFormat": "{{instance}} - {{quantile}}", - "refId": "A" + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{listener}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Consumer - RequestQueueTimeMs", + "title": "Sum of Connections close rate per Listener", + "transformations": [], + "transparent": false, "type": "timeseries" - }, + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Connections", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 35, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "In most cases, a high value can imply slow local storage or the storage is a bottleneck. One should also investigate LogFlushRateAndTimeMs to know how long page flushes are taking, which will also indicate a slow disk. In the case of FetchFollower requests, time spent in LocalTimeMs can be the result of a ZooKeeper write to change the ISR.", + "description": "Rate of ISR shrinks per second.\n If this value is continuously higher than 0, then troubleshoot cause of ISR changing constantly.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -3870,7 +2941,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -3881,75 +2952,85 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "ms" + "unit": "" }, "overrides": [] }, "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 30 + "h": 10, + "w": 8, + "x": 0, + "y": 8 }, - "id": 70, + "height": null, + "hideTimeOverride": false, + "id": 36, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_network_requestmetrics_localtimems{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",quantile=~\"$percentile\",request=\"Fetch\"}", + "datasource": null, + "expr": "rate(kafka_server_replicamanager_isrshrinkspersec{env=\"$env\",hostname=~\"$broker\"}[5m])", + "format": "time_series", "hide": false, - "legendFormat": "{{instance}} - {{quantile}}", - "refId": "A" + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Consumer - LocalTimeMs", + "title": "Rate of ISR Shrinks/sec", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "A high value can imply a slow network connection. For fetch request, if the remote time is high, it could be that there is not enough data to give in a fetch response. This can happen when the consumer or replica is caught up and there is no new incoming data. If this is the case, remote time will be close to the max wait time, which is normal. Max wait time is configured via replica.fetch.wait.max.ms and fetch.max.wait.ms.\n", + "description": "Rate of ISR expands per second.\n If this value is continuously higher than 0, then troubleshoot cause of ISR changing constantly.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -3960,7 +3041,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -3971,50 +3052,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "ms" + "unit": "" }, "overrides": [] }, "gridPos": { - "h": 9, + "h": 10, "w": 8, - "x": 0, - "y": 39 + "x": 8, + "y": 8 }, - "id": 71, + "height": null, + "hideTimeOverride": false, + "id": 37, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -4023,23 +3099,80 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_network_requestmetrics_remotetimems{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",quantile=~\"$percentile\",request=\"Fetch\"}", + "datasource": null, + "expr": "rate(kafka_server_replicamanager_isrexpandspersec{env=\"$env\",hostname=~\"$broker\"}[5m])", + "format": "time_series", "hide": false, - "legendFormat": "{{instance}} - {{quantile}}", - "refId": "A" + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Consumer - RemoteTimeMs", + "title": "Rate of ISR Expands/sec", + "transformations": [], + "transparent": false, "type": "timeseries" - }, + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "In-Sync Replicas", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 9 + }, + "height": null, + "hideTimeOverride": false, + "id": 38, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "A high value can imply there aren't enough network threads or the network cant dequeue responses quickly enough, causing back pressure in the response queue.\n", + "description": "Time expend on the request queue.\n Moved from network socket to request queue by Network threads.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -4050,7 +3183,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -4061,51 +3194,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 9, + "h": 10, "w": 8, - "x": 8, - "y": 39 + "x": 0, + "y": 9 }, - "id": 72, + "height": null, + "hideTimeOverride": false, + "id": 39, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -4114,23 +3241,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_network_requestmetrics_responsequeuetimems{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",quantile=~\"$percentile\",request=\"Fetch\"}", + "datasource": null, + "expr": "kafka_network_requestmetrics_requestqueuetimems{env=\"$env\",hostname=~\"$broker\",quantile=~\"$quantile\",request=\"Produce\"}", + "format": "time_series", "hide": false, - "legendFormat": "{{instance}} - {{quantile}}", - "refId": "A" + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Consumer - ResponseQueueTimeMs", + "title": "Produce: Request Queue Time", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "A high value can imply the zero-copy from disk to the network is slow, or the network is the bottleneck because the network cant dequeue responses of the TCP socket as quickly as theyre being created. If the network buffer gets full, Kafka will block.\n", + "description": "Time expend doing local IO.\n Moved from request queue to storage device operations by IO threads.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -4141,7 +3283,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -4152,51 +3294,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 9, + "h": 10, "w": 8, - "x": 16, - "y": 39 + "x": 8, + "y": 9 }, - "id": 73, + "height": null, + "hideTimeOverride": false, + "id": 40, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -4205,38 +3341,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_network_requestmetrics_responsesendtimems{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",quantile=~\"$percentile\",request=\"Fetch\"}", + "datasource": null, + "expr": "kafka_network_requestmetrics_localtimems{env=\"$env\",hostname=~\"$broker\",quantile=~\"$quantile\",request=\"Produce\"}", + "format": "time_series", "hide": false, - "legendFormat": "{{instance}} - {{quantile}}", - "refId": "A" + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Consumer - ResponseSendTimeMs", + "title": "Produce: Local Time", + "transformations": [], + "transparent": false, "type": "timeseries" - } - ], - "title": "Consumer Performance", - "type": "row" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 34 - }, - "id": 66, - "panels": [ + }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "A high value can imply there aren't enough IO threads or the CPU is a bottleneck, or the request queue isnt large enough. The request queue size should match the number of connections.", + "description": "Time expend waiting for coordination with other brokers/internal condition.\n At purgatory.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -4247,7 +3383,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -4258,76 +3394,85 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 9, - "w": 12, - "x": 0, - "y": 31 + "h": 10, + "w": 8, + "x": 16, + "y": 9 }, - "id": 74, + "height": null, + "hideTimeOverride": false, + "id": 41, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_network_requestmetrics_requestqueuetimems{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",quantile=~\"$percentile\",request=\"FetchFollower\"}", + "datasource": null, + "expr": "kafka_network_requestmetrics_remotetimems{env=\"$env\",hostname=~\"$broker\",quantile=~\"$quantile\",request=\"Produce\"}", + "format": "time_series", "hide": false, - "legendFormat": "{{instance}} - {{quantile}}", - "refId": "A" + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "FetchFollower - RequestQueueTimeMs", + "title": "Produce: Remote Time", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "In most cases, a high value can imply slow local storage or the storage is a bottleneck. One should also investigate LogFlushRateAndTimeMs to know how long page flushes are taking, which will also indicate a slow disk. In the case of FetchFollower requests, time spent in LocalTimeMs can be the result of a ZooKeeper write to change the ISR.", + "description": "Time expend waiting in response queue.\n Moved from purgatory to response queue by IO threads.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -4338,7 +3483,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -4349,76 +3494,85 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 9, - "w": 12, - "x": 12, - "y": 31 + "h": 10, + "w": 8, + "x": 0, + "y": 10 }, - "id": 75, + "height": null, + "hideTimeOverride": false, + "id": 42, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_network_requestmetrics_localtimems{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",quantile=~\"$percentile\",request=\"FetchFollower\"}", + "datasource": null, + "expr": "kafka_network_requestmetrics_responsequeuetimems{env=\"$env\",hostname=~\"$broker\",quantile=~\"$quantile\",request=\"Produce\"}", + "format": "time_series", "hide": false, - "legendFormat": "{{instance}} - {{quantile}}", - "refId": "A" + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "FetchFollower - LocalTimeMs", + "title": "Produce: Response Queue Time", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "A high value can imply a slow network connection. For fetch request, if the remote time is high, it could be that there is not enough data to give in a fetch response. This can happen when the consumer or replica is caught up and there is no new incoming data. If this is the case, remote time will be close to the max wait time, which is normal. Max wait time is configured via replica.fetch.wait.max.ms and fetch.max.wait.ms.\n", + "description": "Time expend delivering response.\n Moved from response queue to client by Networkc threads.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -4429,7 +3583,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -4440,50 +3594,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 9, + "h": 10, "w": 8, - "x": 0, - "y": 40 + "x": 8, + "y": 10 }, - "id": 76, + "height": null, + "hideTimeOverride": false, + "id": 43, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -4492,23 +3641,80 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_network_requestmetrics_remotetimems{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",quantile=~\"$percentile\",request=\"FetchFollower\"}", + "datasource": null, + "expr": "kafka_network_requestmetrics_responsesendtimems{env=\"$env\",hostname=~\"$broker\",quantile=~\"$quantile\",request=\"Produce\"}", + "format": "time_series", "hide": false, - "legendFormat": "{{instance}} - {{quantile}}", - "refId": "A" + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "FetchFollower - RemoteTimeMs", + "title": "Produce: Response Send Time", + "transformations": [], + "transparent": false, "type": "timeseries" - }, + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Request latency: Producer", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 11 + }, + "height": null, + "hideTimeOverride": false, + "id": 44, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "A high value can imply there aren't enough network threads or the network cant dequeue responses quickly enough, causing back pressure in the response queue.\n", + "description": "Time expend on the request queue.\n Moved from network socket to request queue by Network threads.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -4519,7 +3725,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -4530,50 +3736,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 9, + "h": 10, "w": 8, - "x": 8, - "y": 40 + "x": 0, + "y": 11 }, - "id": 77, + "height": null, + "hideTimeOverride": false, + "id": 45, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -4582,23 +3783,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_network_requestmetrics_responsequeuetimems{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",quantile=~\"$percentile\",request=\"FetchFollower\"}", + "datasource": null, + "expr": "kafka_network_requestmetrics_requestqueuetimems{env=\"$env\",hostname=~\"$broker\",quantile=~\"$quantile\",request=\"Fetch\"}", + "format": "time_series", "hide": false, - "legendFormat": "{{instance}} - {{quantile}}", - "refId": "A" + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "FetchFollower - ResponseQueueTimeMs", + "title": "Fetch: Request Queue Time", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "A high value can imply the zero-copy from disk to the network is slow, or the network is the bottleneck because the network cant dequeue responses of the TCP socket as quickly as theyre being created. If the network buffer gets full, Kafka will block.\n", + "description": "Time expend doing local IO.\n Moved from request queue to storage device operations by IO threads.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -4609,7 +3825,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -4620,51 +3836,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 9, + "h": 10, "w": 8, - "x": 16, - "y": 40 + "x": 8, + "y": 11 }, - "id": 78, + "height": null, + "hideTimeOverride": false, + "id": 46, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -4673,37 +3883,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_network_requestmetrics_responsesendtimems{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\",quantile=~\"$percentile\",request=\"FetchFollower\"}", + "datasource": null, + "expr": "kafka_network_requestmetrics_localtimems{env=\"$env\",hostname=~\"$broker\",quantile=~\"$quantile\",request=\"Fetch\"}", + "format": "time_series", "hide": false, - "legendFormat": "{{instance}} - {{quantile}}", - "refId": "A" + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "FetchFollower - ResponseSendTimeMs", + "title": "Fetch: Local Time", + "transformations": [], + "transparent": false, "type": "timeseries" - } - ], - "title": "Fetch Follower Performance", - "type": "row" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 35 - }, - "id": 102, - "panels": [ + }, { + "cacheTimeout": null, "datasource": "Prometheus", + "description": "Time expend waiting for coordination with other brokers/internal condition.\n At purgatory.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -4714,7 +3925,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -4725,50 +3936,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "short" + "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 32 + "h": 10, + "w": 8, + "x": 16, + "y": 11 }, - "id": 98, + "height": null, + "hideTimeOverride": false, + "id": 47, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -4777,22 +3983,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_server_socketservermetrics_connection_count{job=\"kafka-broker\", env=\"$env\", instance=~\"$instance\"}) by (listener)", + "datasource": null, + "expr": "kafka_network_requestmetrics_remotetimems{env=\"$env\",hostname=~\"$broker\",quantile=~\"$quantile\",request=\"Fetch\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{listener}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Connections count per listener", + "title": "Fetch: Remote Time", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", + "description": "Time expend waiting in response queue.\n Moved from purgatory to response queue by IO threads.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -4803,7 +4025,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -4814,50 +4036,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "short" + "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 32 + "h": 10, + "w": 8, + "x": 0, + "y": 12 }, - "id": 100, + "height": null, + "hideTimeOverride": false, + "id": 48, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -4866,22 +4083,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_server_socketservermetrics_connection_count{job=\"kafka-broker\", env=\"$env\", instance=~\"$instance\"}) by (instance)", + "datasource": null, + "expr": "kafka_network_requestmetrics_responsequeuetimems{env=\"$env\",hostname=~\"$broker\",quantile=~\"$quantile\",request=\"Fetch\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Connections count per broker", + "title": "Fetch: Response Queue Time", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", + "description": "Time expend delivering response.\n Moved from response queue to client by Networkc threads.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -4892,7 +4125,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -4903,50 +4136,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "short" + "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 40 + "h": 10, + "w": 8, + "x": 8, + "y": 12 }, - "id": 104, + "height": null, + "hideTimeOverride": false, + "id": 49, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -4955,22 +4183,80 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_server_socketservermetrics_connection_creation_rate{job=\"kafka-broker\", env=\"$env\", instance=~\"$instance\"}) by (listener)", + "datasource": null, + "expr": "kafka_network_requestmetrics_responsesendtimems{env=\"$env\",hostname=~\"$broker\",quantile=~\"$quantile\",request=\"Fetch\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{listener}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Connections creation rate per listener", + "title": "Fetch: Response Send Time", + "transformations": [], + "transparent": false, "type": "timeseries" - }, + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Request latency: Consumer Fetch", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 13 + }, + "height": null, + "hideTimeOverride": false, + "id": 50, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ { + "cacheTimeout": null, "datasource": "Prometheus", + "description": "Time expend on the request queue.\n Moved from network socket to request queue by Network threads.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -4981,7 +4267,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -4992,50 +4278,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "short" + "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 40 + "h": 10, + "w": 8, + "x": 0, + "y": 13 }, - "id": 106, + "height": null, + "hideTimeOverride": false, + "id": 51, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -5044,22 +4325,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_server_socketservermetrics_connection_creation_rate{job=\"kafka-broker\", env=\"$env\", instance=~\"$instance\"}) by (instance)", + "datasource": null, + "expr": "kafka_network_requestmetrics_requestqueuetimems{env=\"$env\",hostname=~\"$broker\",quantile=~\"$quantile\",request=\"FetchFollower\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Connections creation rate per instance", + "title": "Fetch: Request Queue Time", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", + "description": "Time expend doing local IO.\n Moved from request queue to storage device operations by IO threads.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -5070,7 +4367,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -5081,50 +4378,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "short" + "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 48 + "h": 10, + "w": 8, + "x": 8, + "y": 13 }, - "id": 108, + "height": null, + "hideTimeOverride": false, + "id": 52, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -5133,22 +4425,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_server_socketservermetrics_connection_close_rate{job=\"kafka-broker\", env=\"$env\", instance=~\"$instance\"}) by (listener)", + "datasource": null, + "expr": "kafka_network_requestmetrics_localtimems{env=\"$env\",hostname=~\"$broker\",quantile=~\"$quantile\",request=\"FetchFollower\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{listener}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Connections close rate per listener", + "title": "Fetch: Local Time", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", + "description": "Time expend waiting for coordination with other brokers/internal condition.\n At purgatory.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -5159,7 +4467,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -5170,50 +4478,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "short" + "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 48 + "h": 10, + "w": 8, + "x": 16, + "y": 13 }, - "id": 110, + "height": null, + "hideTimeOverride": false, + "id": 53, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -5222,23 +4525,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_server_socketservermetrics_connection_close_rate{job=\"kafka-broker\", env=\"$env\", instance=~\"$instance\"}) by (instance)", + "datasource": null, + "expr": "kafka_network_requestmetrics_remotetimems{env=\"$env\",hostname=~\"$broker\",quantile=~\"$quantile\",request=\"FetchFollower\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Connections close rate per instance", + "title": "Fetch: Remote Time", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "Tracks the amount of time Acceptor is blocked from accepting connections. See KIP-402 for more details.", + "description": "Time expend waiting in response queue.\n Moved from purgatory to response queue by IO threads.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -5249,7 +4567,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -5260,51 +4578,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "percent" + "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 12, + "h": 10, + "w": 8, "x": 0, - "y": 56 + "y": 14 }, - "id": 112, + "height": null, + "hideTimeOverride": false, + "id": 54, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -5313,22 +4625,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_network_acceptor_acceptorblockedpercent{job=\"kafka-broker\", env=\"$env\", instance=~\"$instance\"}", + "datasource": null, + "expr": "kafka_network_requestmetrics_responsequeuetimems{env=\"$env\",hostname=~\"$broker\",quantile=~\"$quantile\",request=\"FetchFollower\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{instance}} - {{listener}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Acceptor Blocked Percentage", + "title": "Fetch: Response Queue Time", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", + "description": "Time expend delivering response.\n Moved from response queue to client by Networkc threads.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -5339,7 +4667,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -5350,50 +4678,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "short" + "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 56 + "h": 10, + "w": 8, + "x": 8, + "y": 14 }, - "id": 114, + "height": null, + "hideTimeOverride": false, + "id": 55, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -5402,38 +4725,80 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_server_socketservermetrics_connections{job=\"kafka-broker\", env=\"$env\", instance=~\"$instance\"}) by (client_software_name, client_software_version)", + "datasource": null, + "expr": "kafka_network_requestmetrics_responsesendtimems{env=\"$env\",hostname=~\"$broker\",quantile=~\"$quantile\",request=\"FetchFollower\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{client_software_name}} {{client_software_version}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}} ({{quantile}}th)", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Connections per client version", + "title": "Fetch: Response Send Time", + "transformations": [], + "transparent": false, "type": "timeseries" } ], - "title": "Connections", + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Request latency: Replica Fetch", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 36 + "y": 15 }, - "id": 120, + "height": null, + "hideTimeOverride": false, + "id": 56, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "Number of consumer groups per group coordinator", + "description": "Number of groups managed by Broker", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -5444,7 +4809,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -5455,50 +4820,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "short" + "unit": "" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 12, + "h": 10, + "w": 8, "x": 0, - "y": 33 + "y": 15 }, - "id": 116, + "height": null, + "hideTimeOverride": false, + "id": 57, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -5507,24 +4867,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_coordinator_group_groupmetadatamanager_numgroups{job=\"kafka-broker\", env=\"$env\", instance=~\"$instance\"}", + "datasource": null, + "expr": "kafka_coordinator_group_groupmetadatamanager_numgroups{env=\"$env\",hostname=~\"$broker\"}", + "format": "time_series", + "hide": false, "instant": false, "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Consumer groups number per coordinator", + "title": "Number of Groups per Broker", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "Number of consumer group per state", + "description": "Number of stable groups managed by Broker", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -5535,7 +4909,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -5546,50 +4920,47 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, + "showPoints": "auto", + "spanNulls": false, "stacking": { - "group": "A", - "mode": "none" + "mode": "normal" }, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "short" + "unit": "" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 33 + "h": 10, + "w": 8, + "x": 8, + "y": 15 }, - "id": 118, + "height": null, + "hideTimeOverride": false, + "id": 58, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -5598,63 +4969,136 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupsstable{job=\"kafka-broker\", env=\"$env\", instance=~\"$instance\"})", + "datasource": null, + "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupsstable{env=\"$env\",hostname=~\"$broker\"})", + "format": "time_series", + "hide": false, "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "stable", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupspreparingrebalance{job=\"kafka-broker\", env=\"$env\", instance=~\"$instance\"})", + "datasource": null, + "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupspreparingrebalance{env=\"$env\",hostname=~\"$broker\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "preparing-rebalance", - "refId": "B" + "intervalFactor": 2, + "legendFormat": "preparing_rebalance", + "metric": "", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupsdead{job=\"kafka-broker\", env=\"$env\", instance=~\"$instance\"})", + "datasource": null, + "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupsdead{env=\"$env\",hostname=~\"$broker\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "dead", - "refId": "C" + "metric": "", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupscompletingrebalance{job=\"kafka-broker\", env=\"$env\", instance=~\"$instance\"})", + "datasource": null, + "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupscompletingrebalance{env=\"$env\",hostname=~\"$broker\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "completing-rebalance", - "refId": "D" + "intervalFactor": 2, + "legendFormat": "completing_rebalance", + "metric": "", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupsempty{job=\"kafka-broker\", env=\"$env\", instance=~\"$instance\"})", + "datasource": null, + "expr": "sum(kafka_coordinator_group_groupmetadatamanager_numgroupsempty{env=\"$env\",hostname=~\"$broker\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "empty", - "refId": "E" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Nb consumer groups per state", + "title": "Number of Groups per Broker per Status", + "transformations": [], + "transparent": false, "type": "timeseries" } ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "Group Coordinator", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 37 + "y": 16 }, - "id": 46, + "height": null, + "hideTimeOverride": false, + "id": 59, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The number of messages produced converted to match the log.message.format.version.", + "description": "Sum of produce message conversions per second.\n This value increases when the broker receives produce messages from clients using older versions.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -5665,7 +5109,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -5676,77 +5120,85 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "ops" + "unit": "opsps" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 10, "w": 8, "x": 0, - "y": 34 + "y": 16 }, - "id": 48, + "height": null, + "hideTimeOverride": false, + "id": 60, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_server_brokertopicmetrics_producemessageconversionspersec{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\"})", + "datasource": null, + "expr": "sum(kafka_server_brokertopicmetrics_producemessageconversionspersec{env=\"$env\",hostname=~\"$broker\"})", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{topic}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Number of produced message conversion", + "title": "Sum of Produce conversion rate per sec", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The number of messages consumed converted at consumer to match the log.message.format.version.", + "description": "Sum of fetch message conversions per second.\n This value increases when the broker receives fetch messages from clients using older versions.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -5757,7 +5209,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -5768,255 +5220,292 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "ops" + "unit": "opsps" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 10, "w": 8, "x": 8, - "y": 34 + "y": 16 }, - "id": 51, + "height": null, + "hideTimeOverride": false, + "id": 61, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_server_brokertopicmetrics_fetchmessageconversionspersec{job=\"kafka-broker\",env=\"$env\",instance=~\"$instance\"})", + "datasource": null, + "expr": "sum(kafka_server_brokertopicmetrics_fetchmessageconversionspersec{env=\"$env\",hostname=~\"$broker\"})", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{topic}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Number of consumed message conversion", + "title": "Sum of Fetch conversion rate per sec", + "transformations": [], + "transparent": false, "type": "timeseries" }, { "cacheTimeout": null, - "datasource": null, - "description": "Number of connection per client version", + "datasource": "Prometheus", + "description": "Sum of connections aggregated by client version and name.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" } }, - "decimals": 0, "mappings": [], - "unit": "short" + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 10, "w": 8, "x": 16, - "y": 34 + "y": 16 }, - "id": 96, + "height": null, + "hideTimeOverride": false, + "id": 62, "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { - "calcs": [], - "displayMode": "table", - "placement": "bottom", - "values": [ - "value" - ] - }, - "pieType": "pie", - "reduceOptions": { "calcs": [ - "lastNotNull" + "max", + "mean", + "last" ], - "fields": "", - "values": false + "displayMode": "table", + "placement": "bottom" }, "tooltip": { "mode": "single" } }, + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_server_socketservermetrics_connections{job=\"kafka-broker\", env=\"$env\", instance=~\"$instance\"}) by (client_software_name, client_software_version) ", + "datasource": null, + "expr": "sum(kafka_server_socketservermetrics_connections{env=\"$env\",hostname=~\"$broker\"}) by (client_software_name,client_software_version)", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{client_software_name}} - {{client_software_version}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{client_software_name}} (v{{client_software_version}})", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Client version repartition", - "type": "piechart" + "title": "Sum of Connections per version", + "transformations": [], + "transparent": false, + "type": "timeseries" } ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "Message Conversion", + "transformations": [], + "transparent": false, "type": "row" } ], - "refresh": "1m", - "schemaVersion": 30, + "refresh": "30s", + "rows": [], + "schemaVersion": 12, + "sharedCrosshair": false, "style": "dark", - "tags": [], + "tags": [ + "confluent", + "kafka" + ], "templating": { "list": [ { "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { "selected": false, - "text": "dev", - "value": "dev" + "tags": [], + "text": null, + "value": null }, "datasource": "Prometheus", - "definition": "label_values(env)", - "description": null, - "error": null, "hide": 0, "includeAll": false, "label": "Environment", "multi": false, "name": "env", "options": [], - "query": { - "query": "label_values(env)", - "refId": "Prometheus-env-Variable-Query" - }, + "query": "label_values(env)", "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, "type": "query", "useTags": false }, { "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { - "selected": true, - "text": [ - "All" - ], - "value": [ - "$__all" - ] + "selected": false, + "tags": [], + "text": null, + "value": null }, "datasource": "Prometheus", - "definition": "label_values(kafka_server_kafkaserver_brokerstate{env=\"${env}\"}, instance)", - "description": null, - "error": null, "hide": 0, "includeAll": true, - "label": "Instance", + "label": "Broker", "multi": true, - "name": "instance", + "name": "broker", "options": [], - "query": { - "query": "label_values(kafka_server_kafkaserver_brokerstate{env=\"${env}\"}, instance)", - "refId": "Prometheus-instance-Variable-Query" - }, + "query": "label_values(kafka_server_replicamanager_leadercount{env=\"$env\"}, hostname)", "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, "type": "query", "useTags": false }, { - "allValue": "", + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { - "selected": true, - "text": [ - "0.95" - ], - "value": [ - "0.95" - ] + "selected": false, + "tags": [], + "text": null, + "value": null }, "datasource": "Prometheus", - "definition": "label_values(quantile)", - "description": null, - "error": null, "hide": 0, - "includeAll": true, - "label": "Percentile", - "multi": true, - "name": "percentile", + "includeAll": false, + "label": "Quantile", + "multi": false, + "name": "quantile", "options": [], - "query": { - "query": "label_values(quantile)", - "refId": "Prometheus-percentile-Variable-Query" - }, + "query": "label_values(kafka_network_requestmetrics_requestqueuetimems{env=\"$env\"}, quantile)", "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, "type": "query", "useTags": false } ] }, "time": { - "from": "now-12h", + "from": "now-1h", "to": "now" }, "timepicker": { + "hidden": false, "refresh_intervals": [ "5s", "10s", @@ -6042,7 +5531,7 @@ ] }, "timezone": "browser", - "title": "Kafka cluster", - "uid": "qu-QZdfZz", - "version": 2 -} \ No newline at end of file + "title": "Kafka cluster - v2", + "uid": null, + "version": 0 +} diff --git a/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-connect-cluster.json b/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-connect-cluster.json index a2983dde..d2e12193 100644 --- a/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-connect-cluster.json +++ b/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-connect-cluster.json @@ -1,370 +1,547 @@ { + "__inputs": [ + { + "description": "", + "label": "Prometheus", + "name": "DS_PROMETHEUS", + "pluginId": "prometheus", + "pluginName": "Prometheus", + "type": "datasource" + } + ], "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "target": { - "limit": 100, - "matchAny": false, - "tags": [], - "type": "dashboard" - }, - "type": "dashboard" - } - ] + "list": [] }, - "description": "Monitor Apache Kafka Connect", + "description": "Overview of the Kafka Connect cluster", "editable": true, "gnetId": null, - "graphTooltip": 0, - "id": 5, - "iteration": 1632255569594, + "hideControls": false, + "id": null, "links": [], "panels": [ { + "cacheTimeout": null, "collapsed": false, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }, - "id": 199, + "height": null, + "hideTimeOverride": false, + "id": 1, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [], - "title": "General", + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Overview", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "datasource": "Prometheus", + "description": "Kafka Connect online workers returning metrics.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "decimals": 0, + "custom": {}, + "decimals": null, "mappings": [], - "noValue": "0", + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] - } + }, + "unit": "none" }, "overrides": [] }, "gridPos": { - "h": 3, + "h": 5, "w": 4, "x": 0, - "y": 1 + "y": 0 }, - "id": 212, + "height": null, + "hideTimeOverride": false, + "id": 2, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ - "mean" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_connect_connect_worker_metrics_connector_total_task_count{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\"})", - "instant": true, + "datasource": null, + "expr": "count(kafka_connect_app_info{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",version!=\"\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Tasks Total", + "title": "Connect: Online Workers", + "transformations": [], + "transparent": false, "type": "stat" }, { + "cacheTimeout": null, "datasource": "Prometheus", + "description": "Number of tasks deployed on Kafka Connect cluster.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "decimals": 0, + "custom": {}, + "decimals": null, "mappings": [], - "noValue": "0", + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] - } + }, + "unit": "none" }, "overrides": [] }, "gridPos": { - "h": 3, + "h": 5, "w": 4, "x": 4, - "y": 1 + "y": 0 }, - "id": 213, + "height": null, + "hideTimeOverride": false, + "id": 3, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ - "mean" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_connect_connect_worker_metrics_connector_running_task_count{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\"})", - "instant": true, + "datasource": null, + "expr": "sum(kafka_connect_connect_worker_metrics_connector_total_task_count{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Tasks Running", + "title": "Connect: Sum of Total Tasks", + "transformations": [], + "transparent": false, "type": "stat" }, { + "cacheTimeout": null, "datasource": "Prometheus", + "description": "Number of Running Tasks on the Kafka Connect cluster.\n Ideally, this number should be equal to the total number of tasks deployed.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "decimals": 0, + "custom": {}, + "decimals": null, "mappings": [], - "noValue": "0", + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" }, { - "color": "orange", - "value": 1 + "color": "green", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" } ] - } + }, + "unit": "none" }, "overrides": [] }, "gridPos": { - "h": 3, + "h": 5, "w": 4, "x": 8, - "y": 1 + "y": 0 }, - "id": 215, + "height": null, + "hideTimeOverride": false, + "id": 4, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ - "mean" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_connect_connect_worker_metrics_connector_paused_task_count{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\"})", - "instant": true, + "datasource": null, + "expr": "sum(kafka_connect_connect_worker_metrics_connector_running_task_count{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Tasks Paused", + "title": "Connect: Sum of Running Tasks", "transformations": [], + "transparent": false, "type": "stat" }, { + "cacheTimeout": null, "datasource": "Prometheus", + "description": "Number of Paused Tasks on the Kafka Connect cluster.\n Ideally, this number should be zero, as tasks should be running.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "decimals": 0, + "custom": {}, + "decimals": null, "mappings": [], - "noValue": "0", + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" }, { - "color": "red", - "value": 1 + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" } ] - } + }, + "unit": "none" }, "overrides": [] }, "gridPos": { - "h": 3, + "h": 5, "w": 4, "x": 12, - "y": 1 + "y": 0 }, - "id": 214, + "height": null, + "hideTimeOverride": false, + "id": 5, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_connect_connect_worker_metrics_connector_failed_task_count{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\"})", - "instant": true, + "datasource": null, + "expr": "sum(kafka_connect_connect_worker_metrics_connector_paused_task_count{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Tasks Failed", + "title": "Connect: Sum of Paused Tasks", "transformations": [], + "transparent": false, "type": "stat" }, { + "cacheTimeout": null, "datasource": "Prometheus", + "description": "Number of Paused Tasks on the Kafka Connect cluster.\n Ideally, this number should be zero, as tasks should be running.\n It's recommended alerting when this value is higher than 0.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "decimals": 0, + "custom": {}, + "decimals": null, "mappings": [], - "noValue": "0", + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { "color": "green", - "value": null + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" }, { - "color": "yellow", - "value": 1 + "color": "red", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" } ] - } + }, + "unit": "none" }, "overrides": [] }, "gridPos": { - "h": 3, + "h": 5, "w": 4, "x": 16, - "y": 1 + "y": 0 }, - "id": 216, + "height": null, + "hideTimeOverride": false, + "id": 6, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ - "mean" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_connect_connect_worker_metrics_connector_unassigned_task_count{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\"})", - "instant": true, + "datasource": null, + "expr": "sum(kafka_connect_connect_worker_metrics_connector_failed_task_count{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Tasks Unassigned", + "title": "Connect: Sum of Failed Tasks", "transformations": [], + "transparent": false, "type": "stat" }, { + "cacheTimeout": null, "datasource": "Prometheus", + "description": "Informative value. Time since last rebalance.\n When this value is continuously and repeatedly low means some connectors are failing and rebalancing is triggered constantly.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "decimals": 0, + "custom": {}, + "decimals": null, "mappings": [], - "noValue": "0", + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "green", - "value": null - }, - { - "color": "purple", - "value": 1 + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] - } + }, + "unit": "clockms" }, "overrides": [] }, "gridPos": { - "h": 3, + "h": 5, "w": 4, "x": 20, - "y": 1 + "y": 0 }, - "id": 217, + "height": null, + "hideTimeOverride": false, + "id": 7, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", "graphMode": "none", @@ -372,316 +549,434 @@ "orientation": "auto", "reduceOptions": { "calcs": [ - "mean" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_connect_connect_worker_metrics_connector_destroyed_task_count{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\"})", - "instant": true, + "datasource": null, + "expr": "kafka_connect_connect_worker_rebalance_metrics_time_since_last_rebalance_ms{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"} >= 0", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Tasks Destroyed", + "title": "Connect: Time since last rebalance", "transformations": [], + "transparent": false, "type": "stat" }, { "cacheTimeout": null, + "color": { + "mode": "thresholds" + }, + "columns": [], "datasource": "Prometheus", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - } - }, - "decimals": 0, - "mappings": [], - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "paused" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FF9830", - "mode": "fixed" - } - } - ] + "align": "auto", + "displayMode": "auto", + "filterable": false }, - { - "matcher": { - "id": "byName", - "options": "running" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#73BF69", - "mode": "fixed" - } - } - ] + "thresholds": { + "mode": "absolute", + "steps": [] } - ] + }, + "overrides": [] }, + "fontSize": "100%", "gridPos": { - "h": 7, - "w": 12, + "h": 5, + "w": 24, "x": 0, - "y": 4 + "y": 1 }, - "id": 227, + "height": null, + "hideTimeOverride": false, + "id": 8, "interval": null, "links": [], - "maxDataPoints": 1, + "mappings": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { - "legend": { - "calcs": [], - "displayMode": "table", - "placement": "right", - "values": [ - "value", - "percent" - ] - }, - "pieType": "pie", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "tooltip": { - "mode": "single" - } + "showHeader": true }, - "pluginVersion": "7.0.5", + "repeat": null, + "repeatDirection": null, + "span": 6, "targets": [ { - "expr": "sum (kafka_connect_connector_metrics{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\",status=\"running\"})", + "datasource": null, + "expr": "kafka_connect_app_info{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",start_time_ms!=\"\"}", + "format": "table", + "hide": false, "instant": true, "interval": "", - "legendFormat": "running", - "refId": "B" + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum (kafka_connect_connector_metrics{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\",status=\"stopped\"})", + "datasource": null, + "expr": "kafka_connect_app_info{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",version!=\"\"}", + "format": "table", + "hide": false, "instant": true, "interval": "", - "legendFormat": "stopped", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum (kafka_connect_connector_metrics{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\",status=\"paused\"})", + "datasource": null, + "expr": "sum by (hostname) (kafka_connect_connect_worker_metrics_connector_count{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"})", + "format": "table", + "hide": false, "instant": true, "interval": "", - "legendFormat": "paused", - "refId": "C" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Connector repartition per status", - "type": "piechart" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - } - }, - "decimals": 0, - "mappings": [], - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "destroyed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#B877D9", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#F2495C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "paused" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FF9830", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "unassigned" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FADE2A", - "mode": "fixed" - } - } - ] - } - ] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 4 - }, - "id": 219, - "interval": null, - "links": [], - "maxDataPoints": 1, - "options": { - "legend": { - "calcs": [], - "displayMode": "table", - "placement": "right", - "values": [ - "value", - "percent" - ] - }, - "pieType": "pie", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" }, - "tooltip": { - "mode": "single" - } - }, - "pluginVersion": "7.0.5", - "targets": [ { - "expr": "sum(kafka_connect_connect_worker_metrics_connector_running_task_count{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\"})", + "datasource": null, + "expr": "sum by (hostname) (kafka_connect_connect_worker_metrics_connector_startup_success_total{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"})", + "format": "table", + "hide": false, "instant": true, "interval": "", - "legendFormat": "running", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(kafka_connect_connect_worker_metrics_connector_failed_task_count{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\"})", + "datasource": null, + "expr": "sum by (hostname) (kafka_connect_connect_worker_metrics_connector_startup_failure_total{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"})", + "format": "table", + "hide": false, "instant": true, "interval": "", - "legendFormat": "failed", - "refId": "B" + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(kafka_connect_connect_worker_metrics_connector_paused_task_count{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\"})", + "datasource": null, + "expr": "sum by (hostname) (kafka_connect_connect_worker_metrics_task_count{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"})", + "format": "table", + "hide": false, "instant": true, "interval": "", - "legendFormat": "paused", - "refId": "C" + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(kafka_connect_connect_worker_metrics_connector_unassigned_task_count{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\"})", + "datasource": null, + "expr": "sum by (hostname) (kafka_connect_connect_worker_metrics_task_startup_success_total{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"})", + "format": "table", + "hide": false, "instant": true, "interval": "", - "legendFormat": "unassigned", - "refId": "D" + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "sum(kafka_connect_connect_worker_metrics_connector_destroyed_task_count{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\"})", + "datasource": null, + "expr": "sum by (hostname) (kafka_connect_connect_worker_metrics_task_startup_failure_total{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"})", + "format": "table", + "hide": false, "instant": true, "interval": "", - "legendFormat": "destroyed", - "refId": "E" + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Task repartition per status", - "type": "piechart" - }, - { - "datasource": "Prometheus", - "description": "Status of connectors over time", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, + "title": "Connect Workers", + "transformations": [ + { + "id": "seriesToColumns", + "options": { + "byField": "hostname" + } + }, + { + "id": "filterFieldsByName", + "options": { + "include": { + "names": [ + "hostname", + "kafka_connect_cluster_id 1", + "start_time_ms", + "version", + "Value #C", + "Value #D", + "Value #E", + "Value #F", + "Value #G", + "Value #H", + "env 1" + ] + } + } + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "env 1": 0, + "hostname": 2, + "kafka_connect_cluster_id 1": 1, + "start_time_ms": 3, + "version": 4 + }, + "renameByName": { + "Value #C": "connectors", + "Value #D": "conn. success", + "Value #E": "conn. failure", + "Value #F": "tasks", + "Value #G": "tasks success", + "Value #H": "tasks failure", + "env 1": "environment", + "hostname": "worker", + "kafka_connect_cluster_id 1": "cluster", + "start_time_ms": "start time", + "version": "version" + } + } + } + ], + "transparent": false, + "type": "table" + }, + { + "cacheTimeout": null, + "color": { + "mode": "thresholds" + }, + "columns": [], + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": { + "align": "auto", + "displayMode": "auto", + "filterable": false + }, + "thresholds": { + "mode": "absolute", + "steps": [] + } + }, + "overrides": [] + }, + "fontSize": "100%", + "gridPos": { + "h": 5, + "w": 24, + "x": 0, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 9, + "interval": null, + "links": [], + "mappings": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "showHeader": true + }, + "repeat": null, + "repeatDirection": null, + "span": 6, + "targets": [ + { + "datasource": null, + "expr": "kafka_connect_connector_info{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"}", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "sum by (connector) (kafka_connect_connect_worker_metrics_connector_total_task_count{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"})", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "sum by (connector) (kafka_connect_connect_worker_metrics_connector_running_task_count{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"})", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "sum by (connector) (kafka_connect_connect_worker_metrics_connector_failed_task_count{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"})", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "sum by (connector) (kafka_connect_connect_worker_metrics_connector_paused_task_count{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"})", + "format": "table", + "hide": false, + "instant": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connectors", + "transformations": [ + { + "id": "seriesToColumns", + "options": { + "byField": "connector" + } + }, + { + "id": "filterFieldsByName", + "options": { + "include": { + "names": [ + "connector", + "Value #B", + "Value #C", + "Value #D", + "Value #E" + ] + } + } + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "renameByName": { + "Value #B": "tasks", + "Value #C": "running", + "Value #D": "failed", + "Value #E": "paused" + } + } + } + ], + "transparent": false, + "type": "table" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -689,144 +984,88 @@ "viz": false }, "lineInterpolation": "linear", - "lineWidth": 3, + "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "decimals": 0, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "none" + "unit": "percentunit" }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "paused" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "orange", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/stopped.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#F2495C", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/paused.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FF9830", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byRegexp", - "options": "/running.*/" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#73BF69", - "mode": "fixed" - } - } - ] - } - ] + "overrides": [] }, "gridPos": { - "h": 6, + "h": 10, "w": 12, "x": 0, - "y": 11 + "y": 3 }, - "id": 228, + "height": null, + "hideTimeOverride": false, + "id": 10, + "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull" + "max", + "mean", + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum (kafka_connect_connector_metrics{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\",status!=\"\"}) by (status) ", + "datasource": null, + "expr": "kafka_connect_connector_task_metrics_running_ratio{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"}", "format": "time_series", "hide": false, "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{status}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{connector}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Status of connectors", + "title": "Tasks Running Ratio", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "Status of tasks over time", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -845,223 +1084,140 @@ "viz": false }, "lineInterpolation": "linear", - "lineWidth": 3, + "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "decimals": 0, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "none" + "unit": "ms" }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "destroyed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "purple", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "failed" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "red", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "paused" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "orange", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "running" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "green", - "mode": "fixed" - } - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "unassigned" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "yellow", - "mode": "fixed" - } - } - ] - } - ] + "overrides": [] }, "gridPos": { - "h": 6, + "h": 10, "w": 12, "x": 12, - "y": 11 + "y": 3 }, - "id": 226, + "height": null, + "hideTimeOverride": false, + "id": 11, + "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull" + "max", + "mean", + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(kafka_connect_connect_worker_metrics_connector_running_task_count{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "running", - "refId": "A" - }, - { - "expr": "sum(kafka_connect_connect_worker_metrics_connector_failed_task_count{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "failed", - "refId": "B" - }, - { - "expr": "sum(kafka_connect_connect_worker_metrics_connector_paused_task_count{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "paused", - "refId": "C" - }, - { - "expr": "sum(kafka_connect_connect_worker_metrics_connector_destroyed_task_count{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\"})", - "format": "time_series", - "hide": false, - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "destroyed", - "refId": "D" - }, - { - "expr": "sum(kafka_connect_connect_worker_metrics_connector_unassigned_task_count{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\"})", + "datasource": null, + "expr": "kafka_connect_connect_worker_rebalance_metrics_rebalance_avg_time_ms{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"}", "format": "time_series", "hide": false, "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "unassigned", - "refId": "E" + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Status of tasks", + "title": "Rebalance Latency", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "collapsed": false, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 17 + "y": 4 }, - "id": 221, + "height": null, + "hideTimeOverride": false, + "id": 12, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "System", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "datasource": "Prometheus", + "description": "Rate of CPU seconds used by the Java process.\n 100% usage represents one core. \n If there are multiple cores, the total capacity should be 100% * number_cores.", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { - "axisLabel": "Cores", + "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -1072,50 +1228,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "percent" + "unit": "percentunit" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 10, "w": 8, "x": 0, - "y": 18 + "y": 4 }, - "id": 223, + "height": null, + "hideTimeOverride": false, + "id": 13, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -1124,34 +1275,49 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "rate(process_cpu_seconds_total{job=\"kafka-connect\",env=\"$env\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",instance=~\"$instance\"}[5m])", + "datasource": null, + "expr": "irate(process_cpu_seconds_total{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\"}[5m])", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{kafka_connect_cluster_id}}-{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "CPU Usage", + "title": "CPU usage", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", + "description": "Sum of JVM memory used, without including areas (e.g. heap size).", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { - "axisLabel": "Memory", + "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -1159,53 +1325,48 @@ "viz": false }, "lineInterpolation": "linear", - "lineWidth": 2, + "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "bytes" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 10, "w": 8, "x": 8, - "y": 18 + "y": 4 }, - "id": 224, + "height": null, + "hideTimeOverride": false, + "id": 14, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -1214,2030 +1375,168 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum without(area)(jvm_memory_bytes_used{job=\"kafka-connect\",env=\"$env\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",instance=~\"$instance\"})", - "interval": "", - "legendFormat": "{{kafka_connect_cluster_id}}-{{instance}}", - "refId": "A" - }, - { - "expr": "jvm_memory_bytes_max{job=\"kafka-connect\",env=\"$env\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",instance=~\"$instance\",area=\"heap\"}", + "datasource": null, + "expr": "sum without(area)(jvm_memory_bytes_used{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{instance}}", - "refId": "B" + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "JVM Memory Used", + "title": "Memory usage", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", + "description": "Sum of seconds used by Garbage Collection.", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { - "axisLabel": "% time in GC", + "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "decimals": 2, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 18 - }, - "id": 225, - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "expr": "sum without(gc)(rate(jvm_gc_collection_seconds_sum{job=\"kafka-connect\",env=\"$env\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",instance=~\"$instance\"}[5m]))", - "interval": "", - "legendFormat": "{{kafka_connect_cluster_id}}-{{instance}}", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "JVM GC time", - "type": "timeseries" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 26 - }, - "id": 97, - "panels": [ - { - "columns": [], - "datasource": "Prometheus", - "fontSize": "90%", - "gridPos": { - "h": 8, - "w": 24, - "x": 0, - "y": 27 - }, - "id": 146, - "pageSize": 100, - "showHeader": true, - "sort": { - "col": 7, - "desc": true - }, - "styles": [ - { - "alias": "", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "mappingType": 1, - "pattern": "Time", - "thresholds": [], - "type": "hidden", - "unit": "short" - }, - { - "alias": "", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "mappingType": 1, - "pattern": "__name__", - "thresholds": [], - "type": "hidden", - "unit": "short" - }, - { - "alias": "", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "mappingType": 1, - "pattern": "env", - "thresholds": [], - "type": "hidden", - "unit": "short" - }, - { - "alias": "", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "mappingType": 1, - "pattern": "instance", - "thresholds": [], - "type": "string", - "unit": "short" - }, - { - "alias": "", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "mappingType": 1, - "pattern": "job", - "thresholds": [], - "type": "hidden", - "unit": "short" - }, - { - "alias": "", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "mappingType": 1, - "pattern": "client_id", - "thresholds": [], - "type": "hidden", - "unit": "short" - }, - { - "alias": "Startup time", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "MMMM D, YYYY LT", - "decimals": 2, - "mappingType": 1, - "pattern": "start_time_ms", - "thresholds": [], - "type": "date", - "unit": "short" - }, - { - "alias": "", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "mappingType": 1, - "pattern": "Value #B", - "thresholds": [], - "type": "hidden", - "unit": "short" - }, - { - "alias": "", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "mappingType": 1, - "pattern": "Value #A", - "thresholds": [], - "type": "hidden", - "unit": "short" - }, - { - "alias": "Connector Count", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "mappingType": 1, - "pattern": "Value #C", - "thresholds": [], - "type": "number", - "unit": "short" - }, - { - "alias": "Connector Startup Success Total", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "mappingType": 1, - "pattern": "Value #D", - "thresholds": [], - "type": "number", - "unit": "short" - }, - { - "alias": "Connector Startup Failure Total", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "mappingType": 1, - "pattern": "Value #E", - "thresholds": [], - "type": "number", - "unit": "short" - }, - { - "alias": "Number of rebalances", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "mappingType": 1, - "pattern": "Value #F", - "thresholds": [], - "type": "number", - "unit": "short" - }, - { - "alias": "Average time of Rebalances", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "mappingType": 1, - "pattern": "Value #G", - "thresholds": [], - "type": "number", - "unit": "ms", - "valueMaps": [ - { - "text": "0", - "value": "NaN" - }, - { - "text": "N/A", - "value": "null" - } - ] - }, - { - "alias": "Time since last rebalance", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "mappingType": 1, - "pattern": "Value #H", - "thresholds": [], - "type": "number", - "unit": "ms" - }, - { - "alias": "Worker instance", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "mappingType": 1, - "pattern": "instance", - "thresholds": [], - "type": "number", - "unit": "short" - }, - { - "alias": "Number of tasks", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "mappingType": 1, - "pattern": "Value #I", - "thresholds": [], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "mappingType": 1, - "pattern": "", - "thresholds": [], - "type": "number", - "unit": "short" - }, - { - "alias": "Task Startup Success ", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "mappingType": 1, - "pattern": "Value #J", - "thresholds": [], - "type": "number", - "unit": "short" - }, - { - "alias": "Task Startup Failure", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "mappingType": 1, - "pattern": "Value #K", - "thresholds": [], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "align": "right", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "decimals": 2, - "pattern": "/.*/", - "thresholds": [], - "type": "number", - "unit": "short" - } - ], - "targets": [ - { - "expr": "kafka_connect_app_info{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",client_id!=\"\",start_time_ms!=\"\"}", - "format": "table", - "hide": false, - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "B" - }, - { - "expr": "kafka_connect_app_info{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",client_id!=\"\",version!=\"\"}", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "A" - }, - { - "expr": "sum by (instance) (kafka_connect_connect_worker_metrics_connector_count{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\"})", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "C" - }, - { - "expr": "sum by (instance) (kafka_connect_connect_worker_metrics_connector_startup_success_total{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\"})", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "D" - }, - { - "expr": "sum by (instance) (kafka_connect_connect_worker_metrics_connector_startup_failure_total{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\"})", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "E" - }, - { - "expr": "sum by (instance) (kafka_connect_connect_worker_metrics_task_count{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\"})", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "I" - }, - { - "expr": "sum by (instance) (kafka_connect_connect_worker_metrics_task_startup_success_total{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\"})", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "J" - }, - { - "expr": "sum by (instance) (kafka_connect_connect_worker_metrics_task_startup_failure_total{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\"})", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "K" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Connect Worker", - "transform": "table", - "transparent": true, - "type": "table-old" - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "Prometheus", - "description": "Average number of network operations (reads or writes) on all connections per second", - "fieldConfig": { - "defaults": { - "links": [] - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 8, - "x": 0, - "y": 35 - }, - "hiddenSeries": false, - "id": 95, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.1.3", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "kafka_connect_connect_metrics_network_io_rate{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",client_id!=\"\"}", - "format": "time_series", - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{instance}}-{{client_id}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Network IO Rate", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "ops", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "Prometheus", - "description": "Bytes per second read off all sockets", - "fieldConfig": { - "defaults": { - "links": [] - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 8, - "x": 8, - "y": 35 - }, - "hiddenSeries": false, - "id": 91, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.1.3", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "kafka_connect_connect_metrics_incoming_byte_rate{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",client_id!=\"\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{instance}}-{{client_id}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Incoming Byte Rate", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "Prometheus", - "description": "Average number of outgoing bytes sent per second to all servers", - "fieldConfig": { - "defaults": { - "links": [] - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 8, - "x": 16, - "y": 35 - }, - "hiddenSeries": false, - "id": 171, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.1.3", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "kafka_connect_connect_metrics_outgoing_byte_rate{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",client_id!=\"\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{instance}}-{{client_id}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Outgoing Byte Rate", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "Bps", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "Prometheus", - "description": "Current number of active connections", - "fieldConfig": { - "defaults": { - "links": [] - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 8, - "x": 0, - "y": 42 - }, - "hiddenSeries": false, - "id": 169, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.1.3", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "kafka_connect_connect_metrics_connection_count{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",client_id!=\"\"}", - "format": "time_series", - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{instance}}-{{client_id}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Current number of active connections", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 0, - "format": "connections", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "Prometheus", - "description": "Connections that failed authentication", - "fieldConfig": { - "defaults": { - "links": [] - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 8, - "x": 8, - "y": 42 - }, - "hiddenSeries": false, - "id": 170, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.1.3", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "kafka_connect_connect_metrics_failed_authentication_total{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",client_id!=\"\"}", - "format": "time_series", - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{instance}}-{{client_id}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Failed authentication connections", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 0, - "format": "connections", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "Prometheus", - "description": "Connections that were successfully authenticated using SASL or SSL", - "fieldConfig": { - "defaults": { - "links": [] - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 8, - "x": 16, - "y": 42 - }, - "hiddenSeries": false, - "id": 174, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.1.3", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "kafka_connect_connect_metrics_successful_authentication_rate{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",client_id!=\"\"}", - "format": "time_series", - "instant": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{instance}}-{{client_id}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Success authentication connections", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "decimals": 0, - "format": "connections", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "Prometheus", - "description": "Average number of requests sent per second", - "fieldConfig": { - "defaults": { - "links": [] - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 8, - "x": 0, - "y": 49 - }, - "hiddenSeries": false, - "id": 172, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.1.3", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "kafka_connect_connect_metrics_request_rate{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",client_id!=\"\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{instance}}-{{client_id}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Average number of requests", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "reqps", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "Prometheus", - "description": "Responses received and sent per second", - "fieldConfig": { - "defaults": { - "links": [] - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 8, - "x": 8, - "y": 49 - }, - "hiddenSeries": false, - "id": 173, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.1.3", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "kafka_connect_connect_metrics_response_rate{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",client_id!=\"\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{instance}}-{{client_id}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Responses received and sent", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "reqps", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "Prometheus", - "description": "Fraction of time the I/O thread spent doing I/O", - "fieldConfig": { - "defaults": { - "links": [] - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 8, - "x": 16, - "y": 49 - }, - "hiddenSeries": false, - "id": 93, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "8.1.3", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "kafka_connect_connect_metrics_io_ratio{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",client_id!=\"\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{instance}}-{{client_id}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "IO Ratio", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "percentunit", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - } - ], - "title": "Connect Worker", - "type": "row" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 27 - }, - "id": 132, - "panels": [ - { - "columns": [], - "datasource": "Prometheus", - "fontSize": "110%", - "gridPos": { - "h": 11, - "w": 24, - "x": 0, - "y": 28 - }, - "id": 129, - "pageSize": 100, - "showHeader": true, - "sort": { - "col": 8, - "desc": true - }, - "styles": [ - { - "alias": "", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "mappingType": 1, - "pattern": "Time", - "thresholds": [], - "type": "hidden", - "unit": "short" - }, - { - "alias": "", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "mappingType": 1, - "pattern": "__name__", - "thresholds": [], - "type": "hidden", - "unit": "short" - }, - { - "alias": "class", - "align": "left", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "mappingType": 1, - "pattern": "connector_class", - "preserveFormat": false, - "thresholds": [], - "type": "string", - "unit": "short", - "valueMaps": [] - }, - { - "alias": "", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "mappingType": 1, - "pattern": "env", - "thresholds": [], - "type": "hidden", - "unit": "short" - }, - { - "alias": "", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "mappingType": 1, - "pattern": "instance", - "thresholds": [], - "type": "hidden", - "unit": "short" - }, - { - "alias": "", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "mappingType": 1, - "pattern": "job", - "thresholds": [], - "type": "hidden", - "unit": "short" - }, - { - "alias": "Nb of Tasks destroyed", - "align": "auto", - "colorMode": "cell", - "colors": [ - "rgba(50, 172, 45, 0.97)", - "#B877D9", - "#B877D9" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "link": false, - "mappingType": 1, - "pattern": "Value #B", - "thresholds": [ - "1" - ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "mappingType": 1, - "pattern": "Value #A", - "thresholds": [], - "type": "hidden", - "unit": "short" - }, - { - "alias": "", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "mappingType": 1, - "pattern": "Value #C", - "thresholds": [], - "type": "hidden", - "unit": "short" - }, - { - "alias": "", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "mappingType": 1, - "pattern": "Value #D", - "thresholds": [], - "type": "hidden", - "unit": "short" - }, - { - "alias": "", - "align": "auto", - "colorMode": "cell", - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "#F2495C" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "mappingType": 1, - "pattern": "status", - "thresholds": [ - "2" - ], - "type": "string", - "unit": "short", - "valueMaps": [ - { - "text": "running", - "value": "1" - }, - { - "text": "paused", - "value": "2" - }, - { - "text": "stopped", - "value": "3" - } - ] - }, - { - "alias": "name", - "align": "left", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "mappingType": 1, - "pattern": "connector", - "thresholds": [], - "type": "number", - "unit": "short" - }, - { - "alias": "type", - "align": "left", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "mappingType": 1, - "pattern": "connector_type", - "thresholds": [], - "type": "string", - "unit": "short" - }, - { - "alias": "version", - "align": "left", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "mappingType": 1, - "pattern": "connector_version", - "thresholds": [], - "type": "number", - "unit": "short" - }, - { - "alias": "Nb of tasks", - "align": "auto", - "colorMode": "cell", - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "mappingType": 1, - "pattern": "Value #E", - "thresholds": [ - "0", - "1" - ], - "type": "number", - "unit": "short" - }, - { - "alias": "Nb of Tasks running", - "align": "auto", - "colorMode": "cell", - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "mappingType": 1, - "pattern": "Value #F", - "thresholds": [ - "0", - "1" - ], - "type": "number", - "unit": "short" - }, - { - "alias": "Nb of Tasks failed", - "align": "auto", - "colorMode": "cell", - "colors": [ - "rgba(50, 172, 45, 0.97)", - "#F2495C", - "#F2495C" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "link": false, - "mappingType": 1, - "pattern": "Value #G", - "preserveFormat": false, - "thresholds": [ - "1" - ], - "type": "number", - "unit": "short", - "valueMaps": [ - { - "text": "0", - "value": "null" - } - ] - }, - { - "alias": "Nb of Tasks paused", - "align": "auto", - "colorMode": "cell", - "colors": [ - "rgba(50, 172, 45, 0.97)", - "#FF9830", - "#FF9830" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "mappingType": 1, - "pattern": "Value #H", - "thresholds": [ - "1" - ], - "type": "number", - "unit": "short", - "valueMaps": [ - { - "text": "0", - "value": "null" - } - ] - }, - { - "alias": "", - "align": "auto", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 2, - "mappingType": 1, - "pattern": "Value #I", - "thresholds": [], - "type": "hidden", - "unit": "short" - }, - { - "alias": "Nb of Tasks unassigned", - "align": "auto", - "colorMode": "cell", - "colors": [ - "rgba(50, 172, 45, 0.97)", - "#FADE2A", - "#FADE2A" - ], - "dateFormat": "YYYY-MM-DD HH:mm:ss", - "decimals": 0, - "mappingType": 1, - "pattern": "Value #J", - "thresholds": [ - "1" - ], - "type": "number", - "unit": "short" - }, - { - "alias": "", - "align": "right", - "colorMode": null, - "colors": [ - "rgba(245, 54, 54, 0.9)", - "rgba(237, 129, 40, 0.89)", - "rgba(50, 172, 45, 0.97)" - ], - "decimals": 2, - "pattern": "/.*/", - "thresholds": [], - "type": "number", - "unit": "short" - } - ], - "targets": [ - { - "expr": "label_replace(label_replace(label_replace(kafka_connect_connector_info{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\",status!=\"\"}, \"status\", \"1\", \"status\", \"running\"), \"status\", \"2\", \"status\", \"paused\"), \"status\", \"3\", \"status\", \"stopped\")", - "format": "table", - "hide": false, - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "I" - }, - { - "expr": "kafka_connect_connector_info{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\",connector_type!=\"\"}", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "A" - }, - { - "expr": "kafka_connect_connector_info{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\",connector_version!=\"\"}", - "format": "table", - "hide": false, - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "C" - }, - { - "expr": "kafka_connect_connector_info{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\",connector_class!=\"\"}", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "D" - }, - { - "expr": "sum by (connector) (kafka_connect_connect_worker_metrics_connector_total_task_count{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\"})", - "format": "table", - "hide": false, - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "E" - }, - { - "expr": "sum by (connector) (kafka_connect_connect_worker_metrics_connector_running_task_count{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\"})", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "F" - }, - { - "expr": "sum by (connector) (kafka_connect_connect_worker_metrics_connector_failed_task_count{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\"})", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "G" - }, - { - "expr": "sum by (connector) (kafka_connect_connect_worker_metrics_connector_paused_task_count{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\"})", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "H" + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - { - "expr": "sum by (connector) (kafka_connect_connect_worker_metrics_connector_destroyed_task_count{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\"})", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "B" + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" }, - { - "expr": "sum by (connector) (kafka_connect_connect_worker_metrics_connector_unassigned_task_count{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\"})", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "J" + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 15, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" ], - "timeFrom": null, - "timeShift": null, - "title": "Connectors", - "transform": "table", - "transformations": [], - "type": "table-old" + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "sum without(gc)(irate(jvm_gc_collection_seconds_sum{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\"}[5m]))", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "Connector details", - "type": "row" + "timeFrom": null, + "timeShift": null, + "title": "GC collection", + "transformations": [], + "transparent": false, + "type": "timeseries" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 28 + "y": 6 }, - "id": 234, + "height": null, + "hideTimeOverride": false, + "id": 16, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "Rebalances average time", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -3248,8 +1547,8 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 30, - "gradientMode": "opacity", + "fillOpacity": 0, + "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, @@ -3259,52 +1558,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", + "showPoints": "auto", "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "ms" + "unit": "bytes" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 24, + "h": 10, + "w": 8, "x": 0, - "y": 29 + "y": 6 }, - "id": 209, + "height": null, + "hideTimeOverride": false, + "id": 17, + "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -3313,119 +1605,138 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_connect_connect_worker_rebalance_metrics_rebalance_avg_time_ms{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\"}", + "datasource": null, + "expr": "kafka_connect_connector_task_metrics_batch_size_avg{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", "hide": false, "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{kafka_connect_cluster_id}}-{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Rebalances average time", + "title": "Batch Size (Avg.)", + "transformations": [], + "transparent": false, "type": "timeseries" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Time since last rebalance", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "decimals": 0, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" } - ], + }, + "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "#299c46", - "value": null - } - ] + "steps": [] }, - "unit": "clockms" + "unit": "bytes" }, "overrides": [] }, "gridPos": { - "h": 4, - "w": 24, - "x": 0, - "y": 37 + "h": 10, + "w": 8, + "x": 8, + "y": 6 }, - "id": 230, + "height": null, + "hideTimeOverride": false, + "id": 18, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { - "colorMode": "value", - "fieldOptions": { - "calcs": [ - "lastNotNull" - ] - }, - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "reduceOptions": { + "legend": { "calcs": [ - "lastNotNull" + "max", + "mean", + "last" ], - "fields": "", - "values": false + "displayMode": "table", + "placement": "bottom" }, - "text": {}, - "textMode": "auto" + "tooltip": { + "mode": "single" + } }, - "pluginVersion": "8.1.3", - "repeat": "instance", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_connect_connect_worker_rebalance_metrics_time_since_last_rebalance_ms{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",job=\"connect\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\"} >= 0", + "datasource": null, + "expr": "kafka_connect_connector_task_metrics_batch_size_max{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", + "hide": false, "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "title": "($instance) Time since last rebalance ", - "type": "stat" - } - ], - "title": "Rebalances", - "type": "row" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 29 - }, - "id": 112, - "panels": [ + "timeFrom": null, + "timeShift": null, + "title": "Batch Size (Max.)", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "Average size of the batches processed by the connector", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -3436,7 +1747,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -3447,78 +1758,85 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "decbytes" + "unit": "percentunit" }, "overrides": [] }, "gridPos": { - "h": 7, - "w": 12, + "h": 10, + "w": 8, "x": 0, - "y": 30 + "y": 7 }, - "id": 113, + "height": null, + "hideTimeOverride": false, + "id": 19, + "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_connect_connector_task_metrics_batch_size_avg{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\"}", + "datasource": null, + "expr": "kafka_connect_connector_task_metrics_offset_commit_success_percentage{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{connector}}-{{task}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Batch Size Average", + "title": "Offset commit success %", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "Maximum size of the batches processed by the connector", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -3529,7 +1847,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -3540,78 +1858,127 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "decbytes" + "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 30 + "h": 10, + "w": 8, + "x": 8, + "y": 7 }, - "id": 114, + "height": null, + "hideTimeOverride": false, + "id": 20, + "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_connect_connector_task_metrics_batch_size_max{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\"}", + "datasource": null, + "expr": "kafka_connect_connector_task_metrics_offset_commit_avg_time_ms{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{connector}}-{{task}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Batch Size Max", + "title": "Offset commit avg. latency", + "transformations": [], + "transparent": false, "type": "timeseries" - }, + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Tasks", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 21, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "Average percentage of the task’s offset commit attempts that succeeded", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -3622,7 +1989,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -3633,80 +2000,85 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "max": 1, - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "percentunit" + "unit": "" }, "overrides": [] }, "gridPos": { - "h": 7, - "w": 12, + "h": 10, + "w": 8, "x": 0, - "y": 37 + "y": 8 }, - "id": 115, + "height": null, + "hideTimeOverride": false, + "id": 22, + "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_connect_connector_task_metrics_offset_commit_success_percentage{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\"}", + "datasource": null, + "expr": "kafka_connect_task_error_metrics_total_record_failures{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{connector}}-{{task}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Offset commit success percentage", + "title": "Total Record Failures", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The average time in milliseconds taken by this task to commit offsets", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -3717,7 +2089,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -3728,78 +2100,85 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "ms" + "unit": "" }, "overrides": [] }, "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 37 + "h": 10, + "w": 8, + "x": 8, + "y": 8 }, - "id": 116, + "height": null, + "hideTimeOverride": false, + "id": 23, + "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_connect_connector_task_metrics_offset_commit_avg_time_ms{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\"}", + "datasource": null, + "expr": "kafka_connect_task_error_metrics_total_record_errors{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{connector}}-{{task}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Offset commit Average Time", + "title": "Total Record Error", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The fraction of time this task has spent in the running state.", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -3810,7 +2189,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -3821,96 +2200,85 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "max": 1, - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "percentunit" + "unit": "" }, "overrides": [] }, "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 44 + "h": 10, + "w": 8, + "x": 16, + "y": 8 }, - "id": 117, + "height": null, + "hideTimeOverride": false, + "id": 24, + "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_connect_connector_task_metrics_running_ratio{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\"}", + "datasource": null, + "expr": "kafka_connect_task_error_metrics_total_records_skipped{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", "hide": false, + "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{connector}}-{{task}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Running ratio", + "title": "Total Records Skipped", + "transformations": [], + "transparent": false, "type": "timeseries" - } - ], - "title": "Task metrics", - "type": "row" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 30 - }, - "id": 201, - "panels": [ + }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "Total number of failures seen by task", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -3921,7 +2289,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -3932,80 +2300,85 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "decimals": 0, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "none" + "unit": "" }, "overrides": [] }, "gridPos": { - "h": 7, + "h": 10, "w": 8, "x": 0, - "y": 31 + "y": 9 }, - "id": 203, + "height": null, + "hideTimeOverride": false, + "id": 25, + "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_connect_task_error_metrics_total_record_failures{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\"}", + "datasource": null, + "expr": "kafka_connect_task_error_metrics_total_errors_logged{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{connector}}-{{task}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Total record failures", + "title": "Total Errors Logged", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "Total number of errors seen by task", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -4016,7 +2389,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -4027,80 +2400,85 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "decimals": 0, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "none" + "unit": "" }, "overrides": [] }, "gridPos": { - "h": 7, + "h": 10, "w": 8, "x": 8, - "y": 31 + "y": 9 }, - "id": 205, + "height": null, + "hideTimeOverride": false, + "id": 26, + "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_connect_task_error_metrics_total_record_errors{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\"}", + "datasource": null, + "expr": "kafka_connect_task_error_metrics_total_retries{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{connector}}-{{task}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Total record errors", + "title": "Total Retries", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "Total number of records skipped by task", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -4111,7 +2489,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -4122,80 +2500,127 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "decimals": 0, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "none" + "unit": "" }, "overrides": [] }, "gridPos": { - "h": 7, + "h": 10, "w": 8, "x": 16, - "y": 31 + "y": 9 }, - "id": 206, + "height": null, + "hideTimeOverride": false, + "id": 27, + "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_connect_task_error_metrics_total_records_skipped{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\"}", + "datasource": null, + "expr": "kafka_connect_task_error_metrics_deadletterqueue_produce_requests{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{connector}}-{{task}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Total record skipped", + "title": "Dead Letter Topic Requests", + "transformations": [], + "transparent": false, "type": "timeseries" - }, + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Task Errors", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 10 + }, + "height": null, + "hideTimeOverride": false, + "id": 28, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The number of messages that was logged into either the dead letter queue or with Log4j", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -4206,7 +2631,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -4217,80 +2642,85 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "decimals": 0, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "none" + "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 7, + "h": 10, "w": 8, "x": 0, - "y": 38 + "y": 10 }, - "id": 208, + "height": null, + "hideTimeOverride": false, + "id": 29, + "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_connect_task_error_metrics_total_errors_logged{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\"}", + "datasource": null, + "expr": "kafka_connect_source_task_metrics_poll_batch_avg_time_ms{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{connector}}-{{task}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Total errors logged", + "title": "Poll Batch Avg. Latency", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "Total number of retries made by task", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -4301,7 +2731,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -4312,80 +2742,85 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "decimals": 0, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "none" + "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 7, + "h": 10, "w": 8, "x": 8, - "y": 38 + "y": 10 }, - "id": 207, + "height": null, + "hideTimeOverride": false, + "id": 30, + "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_connect_task_error_metrics_total_retries{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\"}", + "datasource": null, + "expr": "kafka_connect_source_task_metrics_poll_batch_max_time_ms{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{connector}}-{{task}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Total retries", + "title": "Poll Batch Max. Latency", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "Number of produce requests to the dead letter queue", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -4396,7 +2831,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -4407,80 +2842,85 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "decimals": 0, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "none" + "unit": "ops" }, "overrides": [] }, "gridPos": { - "h": 7, + "h": 10, "w": 8, "x": 0, - "y": 45 + "y": 11 }, - "id": 202, + "height": null, + "hideTimeOverride": false, + "id": 31, + "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_connect_task_error_metrics_deadletterqueue_produce_requests{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\"}", + "datasource": null, + "expr": "kafka_connect_source_task_metrics_source_record_poll_rate{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{connector}}-{{task}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Dead letter queue Produce requests", + "title": "Source Record Poll Rate", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "Number of produce requests to the dead letter queue", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -4491,7 +2931,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -4502,95 +2942,127 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "decimals": 0, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "none" + "unit": "ops" }, "overrides": [] }, "gridPos": { - "h": 7, + "h": 10, "w": 8, "x": 8, - "y": 45 + "y": 11 }, - "id": 204, + "height": null, + "hideTimeOverride": false, + "id": 32, + "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_connect_task_error_metrics_deadletterqueue_produce_requests{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\"}", + "datasource": null, + "expr": "kafka_connect_source_task_metrics_source_record_write_rate{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{connector}}-{{task}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Dead letter queue Produce requests", + "title": "Source Record Write Rate", + "transformations": [], + "transparent": false, "type": "timeseries" } ], - "title": "Task Errors metrics", + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Source Tasks", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 31 + "y": 12 }, - "id": 139, + "height": null, + "hideTimeOverride": false, + "id": 33, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The average time in milliseconds taken by this task to poll for a batch of source records", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -4601,7 +3073,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -4612,79 +3084,85 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 7, + "h": 10, "w": 8, "x": 0, - "y": 32 + "y": 12 }, - "id": 140, + "height": null, + "hideTimeOverride": false, + "id": 34, + "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_connect_source_task_metrics_poll_batch_avg_time_ms{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\",task!=\"\"}", + "datasource": null, + "expr": "kafka_connect_sink_task_metrics_put_batch_avg_time_ms{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", + "hide": false, "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{connector}}-{{task}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Poll Batch Average time", + "title": "Put Batch Avg. Latency", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The maximum time in milliseconds taken by this task to poll for a batch of source records", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -4695,7 +3173,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -4706,80 +3184,85 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 7, + "h": 10, "w": 8, "x": 8, - "y": 32 + "y": 12 }, - "id": 141, + "height": null, + "hideTimeOverride": false, + "id": 35, + "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_connect_source_task_metrics_poll_batch_max_time_ms{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\",task!=\"\"}", + "datasource": null, + "expr": "kafka_connect_sink_task_metrics_put_batch_max_time_ms{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", "hide": false, "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{connector}}-{{task}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Poll Batch Max time", + "title": "Put Batch Max. Latency", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The average per-second number of records produced/polled (before transformation) by this task belonging to the named source connector in this worker.", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -4790,7 +3273,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -4801,79 +3284,127 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "ops" + "unit": "" }, "overrides": [] }, "gridPos": { - "h": 7, + "h": 10, "w": 8, "x": 0, - "y": 39 + "y": 13 }, - "id": 144, + "height": null, + "hideTimeOverride": false, + "id": 36, + "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_connect_source_task_metrics_source_record_poll_rate{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\",task!=\"\"}", + "datasource": null, + "expr": "kafka_connect_sink_task_metrics_partition_count{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", + "hide": false, "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{connector}}-{{task}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}]", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Source Record Poll rate", + "title": "Partition Count", + "transformations": [], + "transparent": false, "type": "timeseries" - }, + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Sink Tasks", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 5 + }, + "height": null, + "hideTimeOverride": false, + "id": 37, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The average per-second number of records output from the transformations and written to Kafka for this task belonging to the named source connector in this worker. This is after transformations are applied and excludes any records filtered out by the transformations.", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -4884,7 +3415,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -4895,79 +3426,85 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "short" + "unit": "binBps" }, "overrides": [] }, "gridPos": { - "h": 7, + "h": 10, "w": 8, - "x": 8, - "y": 39 + "x": 0, + "y": 5 }, - "id": 143, + "height": null, + "hideTimeOverride": false, + "id": 38, + "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_connect_source_task_metrics_source_record_write_rate{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\",task!=\"\"}", + "datasource": null, + "expr": "kafka_connect_connect_metrics_incoming_byte_rate{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\"}", "format": "time_series", + "hide": false, "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{connector}}-{{task}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Source Record Write rate", + "title": "Incoming Byte Rate", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The average number of records that have been produced by this task but not yet completely written to Kafka.", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -4978,7 +3515,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -4989,79 +3526,85 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "short" + "unit": "binBps" }, "overrides": [] }, "gridPos": { - "h": 7, + "h": 10, "w": 8, - "x": 0, - "y": 46 + "x": 8, + "y": 5 }, - "id": 142, + "height": null, + "hideTimeOverride": false, + "id": 39, + "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_connect_source_task_metrics_source_record_active_count_avg{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\",task!=\"\"}", + "datasource": null, + "expr": "kafka_connect_connect_metrics_outgoing_byte_rate{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\"}", "format": "time_series", + "hide": false, "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{connector}}-{{task}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Source Record Active Count average", + "title": "Outgoing Byte Rate", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The maximum number of records that have been produced by this task but not yet completely written to Kafka.", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -5072,7 +3615,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -5083,94 +3626,85 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "short" + "unit": "percentunit" }, "overrides": [] }, "gridPos": { - "h": 7, + "h": 10, "w": 8, - "x": 8, - "y": 46 + "x": 0, + "y": 6 }, - "id": 145, + "height": null, + "hideTimeOverride": false, + "id": 40, + "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_connect_source_task_metrics_source_record_active_count_max{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\",task!=\"\"}", + "datasource": null, + "expr": "kafka_connect_connect_metrics_io_ratio{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\"}", "format": "time_series", + "hide": false, "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{connector}}-{{task}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Source Record Active Count max", + "title": "IO Ratio", + "transformations": [], + "transparent": false, "type": "timeseries" - } - ], - "title": "Source metrics", - "type": "row" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 32 - }, - "id": 134, - "panels": [ + }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The number of topic partitions assigned to this task belonging to the named sink connector in this worker.", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -5181,7 +3715,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -5192,76 +3726,85 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "decimals": 0, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "short" + "unit": "binBps" }, "overrides": [] }, "gridPos": { - "h": 7, + "h": 10, "w": 8, - "x": 0, - "y": 33 + "x": 8, + "y": 6 }, - "id": 135, + "height": null, + "hideTimeOverride": false, + "id": 41, + "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { - "calcs": [], - "displayMode": "list", + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_connect_sink_task_metrics_partition_count{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\",task!=\"\"}", + "datasource": null, + "expr": "kafka_connect_connect_metrics_network_io_rate{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\"}", "format": "time_series", + "hide": false, "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{connector}}-{{task}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Partition Count", + "title": "Network IO Rate", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The average time in milliseconds taken by this task to put a batch of sinks records", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -5272,7 +3815,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -5283,76 +3826,85 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "ms" + "unit": "" }, "overrides": [] }, "gridPos": { - "h": 7, + "h": 10, "w": 8, - "x": 8, - "y": 33 + "x": 0, + "y": 7 }, - "id": 136, + "height": null, + "hideTimeOverride": false, + "id": 42, + "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { - "calcs": [], - "displayMode": "list", + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_connect_sink_task_metrics_put_batch_avg_time_ms{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\",task!=\"\"}", + "datasource": null, + "expr": "kafka_connect_connect_metrics_connection_count{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\"}", "format": "time_series", "hide": false, "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{connector}}-{{task}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Put Batch Average time", + "title": "Active Connections", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The maximum time in milliseconds taken by this task to put a batch of sinks records", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -5363,7 +3915,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -5374,221 +3926,235 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "ms" + "unit": "" }, "overrides": [] }, "gridPos": { - "h": 7, + "h": 10, "w": 8, - "x": 16, - "y": 33 + "x": 8, + "y": 7 }, - "id": 137, + "height": null, + "hideTimeOverride": false, + "id": 43, + "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { - "calcs": [], - "displayMode": "list", + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_connect_sink_task_metrics_put_batch_max_time_ms{job=\"kafka-connect\", env=\"$env\",instance=~\"$instance\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",connector=~\"$connector\",task!=\"\"}", + "datasource": null, + "expr": "kafka_connect_connect_metrics_successful_authentication_rate{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}} (success)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "kafka_connect_connect_metrics_failed_authentication_total{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\"}", "format": "time_series", "hide": false, "instant": false, "interval": "", - "intervalFactor": 1, - "legendFormat": "{{connector}}-{{task}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}} (failed)", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Put Batch Max time", + "title": "Authentications", + "transformations": [], + "transparent": false, "type": "timeseries" } ], - "title": "Sink metrics", + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Connect Workers", + "transformations": [], + "transparent": false, "type": "row" } ], - "refresh": "1m", - "schemaVersion": 30, + "refresh": "30s", + "rows": [], + "schemaVersion": 12, + "sharedCrosshair": false, "style": "dark", - "tags": [], + "tags": [ + "confluent", + "kafka-connect" + ], "templating": { "list": [ { - "allValue": ".+", + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { "selected": false, - "text": "dev", - "value": "dev" + "tags": [], + "text": null, + "value": null }, "datasource": "Prometheus", - "definition": "label_values(env)", - "description": null, - "error": null, "hide": 0, "includeAll": false, "label": "Environment", "multi": false, "name": "env", "options": [], - "query": { - "query": "label_values(env)", - "refId": "Prometheus-env-Variable-Query" - }, + "query": "label_values(env)", "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, "type": "query", "useTags": false }, { - "allValue": ".+", + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { - "selected": true, - "text": [ - "All" - ], - "value": [ - "$__all" - ] + "selected": false, + "tags": [], + "text": null, + "value": null }, "datasource": "Prometheus", - "definition": "label_values(kafka_connect_cluster_id)", - "description": null, - "error": null, "hide": 0, - "includeAll": true, - "label": "Cluster ID", - "multi": true, - "name": "kafka_connect_cluster_id", + "includeAll": false, + "label": "Connect cluster", + "multi": false, + "name": "connect_cluster", "options": [], - "query": { - "query": "label_values(kafka_connect_cluster_id)", - "refId": "StandardVariableQuery" - }, + "query": "label_values(kafka_connect_connect_worker_metrics_connector_total_task_count{env=\"$env\"}, kafka_connect_cluster_id)", "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, "type": "query", "useTags": false }, { "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { "selected": false, - "text": "All", - "value": "$__all" + "tags": [], + "text": null, + "value": null }, "datasource": "Prometheus", - "definition": "label_values(kafka_connect_app_info{job=\"kafka-connect\", env=\"$env\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\"},instance)", - "description": null, - "error": null, "hide": 0, "includeAll": true, - "label": "Instance", - "multi": false, - "name": "instance", + "label": "Connect worker", + "multi": true, + "name": "connect_worker", "options": [], - "query": { - "query": "label_values(kafka_connect_app_info{job=\"kafka-connect\", env=\"$env\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\"},instance)", - "refId": "Prometheus-instance-Variable-Query" - }, + "query": "label_values(kafka_connect_connect_worker_metrics_connector_total_task_count{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"}, hostname)", "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, "type": "query", "useTags": false }, { "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { - "selected": true, - "text": [ - "All" - ], - "value": [ - "$__all" - ] + "selected": false, + "tags": [], + "text": null, + "value": null }, "datasource": "Prometheus", - "definition": "label_values(kafka_connect_connector_task_metrics_pause_ratio{job=\"kafka-connect\", env=\"$env\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\"},connector)", - "description": null, - "error": null, "hide": 0, "includeAll": true, - "label": "Connector name", + "label": "Connector", "multi": true, "name": "connector", "options": [], - "query": { - "query": "label_values(kafka_connect_connector_task_metrics_pause_ratio{job=\"kafka-connect\", env=\"$env\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\"},connector)", - "refId": "Prometheus-connector-Variable-Query" - }, + "query": "label_values(kafka_connect_connect_worker_metrics_connector_total_task_count{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\"}, connector)", "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, "type": "query", "useTags": false } ] }, "time": { - "from": "now-12h", + "from": "now-1h", "to": "now" }, "timepicker": { "hidden": false, "refresh_intervals": [ + "5s", "10s", "30s", "1m", @@ -5611,8 +4177,8 @@ "30d" ] }, - "timezone": "", - "title": "Kafka Connect cluster", - "uid": "AEaSQ97mz", - "version": 1 -} \ No newline at end of file + "timezone": "browser", + "title": "Kafka Connect cluster - v2", + "uid": null, + "version": 0 +} diff --git a/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-consumer.json b/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-consumer.json index 3763e310..3aef1c22 100644 --- a/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-consumer.json +++ b/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-consumer.json @@ -1,611 +1,3236 @@ { + "__inputs": [ + { + "description": "", + "label": "Prometheus", + "name": "DS_PROMETHEUS", + "pluginId": "prometheus", + "pluginName": "Prometheus", + "type": "datasource" + } + ], "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "target": { - "limit": 100, - "matchAny": false, - "tags": [], - "type": "dashboard" - }, - "type": "dashboard" - } - ] + "list": [] }, - "description": "Kafka Consumer Java client metrics", + "description": "Overview of the Kafka consumers", "editable": true, "gnetId": null, - "graphTooltip": 0, - "id": 10, - "iteration": 1635962856628, + "hideControls": false, + "id": null, "links": [], "panels": [ { - "datasource": null, - "description": "", - "gridPos": { - "h": 3, - "w": 24, - "x": 0, - "y": 0 - }, - "id": 58, - "options": { - "content": "# Disclaimer\n\n⚠️ This dashboard has some sample thresholds, this example is not meant to fit all use cases nor is it meant for production. Think of it as a learning tool to help you become comfortable with the metrics and thresholding.\n", - "mode": "markdown" - }, - "pluginVersion": "8.1.3", - "timeFrom": null, - "timeShift": null, - "type": "text" - }, - { + "cacheTimeout": null, "collapsed": false, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 3 + "y": 0 }, - "id": 60, + "height": null, + "hideTimeOverride": false, + "id": 1, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [], - "title": "Key metrics", + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Overview", + "transformations": [], + "transparent": false, "type": "row" }, { - "datasource": null, - "description": "The number of commit calls per second .", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { + "custom": {}, + "decimals": null, "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "red", - "value": null - }, - { - "color": "green", - "value": 0.01 + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] - } + }, + "unit": "none" }, "overrides": [] }, "gridPos": { - "h": 4, - "w": 6, + "h": 5, + "w": 4, "x": 0, - "y": 4 + "y": 0 }, - "id": 38, + "height": null, + "hideTimeOverride": false, + "id": 2, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { - "colorMode": "background", + "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ - "mean" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "kafka_consumer_consumer_coordinator_metrics_commit_rate{client_id=~\"$client_id\", env=\"$env\",hostname=~\"$hostname\"}", + "datasource": null, + "expr": "topk(10, kafka_consumer_consumer_fetch_manager_metrics_records_consumed_rate{env=\"$env\", client_type=\"producer\", client_id=~\"$client_id\", hostname=~\"$server\"} > 0)", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{client_id}}@{{hostname}}", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Commit rate", + "title": "Record Consumed Rate", + "transformations": [], + "transparent": false, "type": "stat" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The average time in ms a request was throttled by a broker.", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "links": [], + "custom": {}, + "decimals": null, "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 1 + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] }, - "unit": "ms" + "unit": "none" }, "overrides": [] }, "gridPos": { - "h": 4, - "w": 6, - "x": 6, - "y": 4 + "h": 5, + "w": 4, + "x": 4, + "y": 0 }, - "id": 10, + "height": null, + "hideTimeOverride": false, + "id": 3, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { - "colorMode": "background", + "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ - "mean" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "kafka_consumer_consumer_fetch_manager_metrics_fetch_throttle_time_avg{client_id=~\"$client_id\", env=\"$env\",hostname=~\"$hostname\"}", + "datasource": null, + "expr": "topk(10, kafka_consumer_consumer_fetch_manager_metrics_records_consumed_rate{env=\"$env\", client_type=\"producer\", client_id=~\"$client_id\", hostname=~\"$server\"} > 0)", + "format": "time_series", + "hide": false, "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{client_id}}@{{hostname}}", - "refId": "B" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Average fetch throttle time", + "title": "Records Lag", + "transformations": [], + "transparent": false, "type": "stat" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "Rate of failed authentication attempts\n", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "links": [], + "custom": {}, + "decimals": null, "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "red", - "value": null + "color": "green", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" }, { - "color": "green", - "value": 0 + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" + }, + { + "color": "red", + "index": 2, + "line": true, + "op": "gt", + "value": 10.0, + "yaxis": "left" } ] - } + }, + "unit": "none" }, "overrides": [] }, "gridPos": { - "h": 4, - "w": 6, - "x": 12, - "y": 4 + "h": 5, + "w": 4, + "x": 8, + "y": 0 }, - "id": 13, + "height": null, + "hideTimeOverride": false, + "id": 4, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { - "colorMode": "background", + "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ - "mean" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "kafka_consumer_consumer_metrics_failed_authentication_rate{client_id=~\"$client_id\", env=\"$env\",hostname=~\"$hostname\"}", + "datasource": null, + "expr": "topk(10, kafka_consumer_consumer_coordinator_metrics_rebalance_rate_per_hour{env=\"$env\", client_type=\"producer\", client_id=~\"$client_id\", hostname=~\"$server\"} > 0)", + "format": "time_series", + "hide": false, "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{client_id}}@{{hostname}}", - "refId": "B" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Failed auth rate", + "title": "Rebalance Rate per hour", + "transformations": [], + "transparent": false, "type": "stat" }, { - "datasource": null, - "description": "The number of total rebalance events per hour, both successful and unsuccessful rebalance attempts.", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { + "custom": {}, + "decimals": null, "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "red", - "value": null + "color": "green", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" }, { - "color": "green", - "value": 0 + "color": "red", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" } ] - } + }, + "unit": "none" }, "overrides": [] }, "gridPos": { - "h": 4, - "w": 6, - "x": 18, - "y": 4 + "h": 5, + "w": 4, + "x": 12, + "y": 0 }, - "id": 40, + "height": null, + "hideTimeOverride": false, + "id": 5, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { - "colorMode": "background", + "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ - "mean" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "kafka_consumer_consumer_coordinator_metrics_rebalance_rate_per_hour{client_id=~\"$client_id\", env=\"$env\",hostname=~\"$hostname\"} + kafka_consumer_consumer_coordinator_metrics_failed_rebalance_rate_per_hour{client_id=~\"$client_id\", env=\"$env\",hostname=~\"$hostname\"}", + "datasource": null, + "expr": "topk(10, kafka_consumer_consumer_coordinator_metrics_failed_rebalance_rate_per_hour{env=\"$env\", client_type=\"producer\", client_id=~\"$client_id\", hostname=~\"$server\"} > 0)", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{client_id}}@{{hostname}}", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Rebalance rate per hour", + "title": "Failed Rebalance Rate per hour", + "transformations": [], + "transparent": false, "type": "stat" }, { - "collapsed": false, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 8 - }, - "id": 16, - "panels": [], - "title": "System", - "type": "row" - }, - { + "cacheTimeout": null, "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "Cores", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], + "custom": {}, + "decimals": null, "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] }, - "unit": "percent" + "unit": "none" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 8, - "x": 0, - "y": 9 + "h": 5, + "w": 4, + "x": 16, + "y": 0 }, - "id": 62, + "height": null, + "hideTimeOverride": false, + "id": 6, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { - "legend": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { "calcs": [ - "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "table", - "placement": "bottom" + "fields": "", + "values": false }, - "tooltip": { - "mode": "single" - } + "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "rate(process_cpu_seconds_total{env=\"$env\",hostname=~\"$hostname\"}[5m])", + "datasource": null, + "expr": "count(kafka_consumer_app_info{env=\"$env\", client_id=~\"$client_id\", version!=\"\", hostname=~\"$server\"}) by (version)", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{job}}@{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{version}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "CPU Usage", - "type": "timeseries" + "title": "Versions", + "transformations": [], + "transparent": false, + "type": "stat" }, { - "datasource": "Prometheus", + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "Memory", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes" - }, - "overrides": [] + "steps": [] + } + } }, "gridPos": { - "h": 8, - "w": 8, - "x": 8, - "y": 9 - }, - "id": 64, - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } + "h": 1, + "w": 24, + "x": 0, + "y": 1 }, - "pluginVersion": "8.1.3", - "targets": [ + "height": null, + "hideTimeOverride": false, + "id": 7, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ { - "exemplar": true, - "expr": "sum without(area)(jvm_memory_bytes_used{env=\"$env\",hostname=~\"$hostname\"})", - "interval": "", - "legendFormat": "{{job}}@{{instance}}", - "refId": "A" + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 8, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_bytes_consumed_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Bytes Consumed Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" }, { - "expr": "jvm_memory_bytes_max{job=\"kafka-connect\",env=\"$env\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",instance=~\"$instance\",area=\"heap\"}", - "interval": "", - "legendFormat": "{{instance}}", - "refId": "B" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "JVM Memory Used", - "type": "timeseries" - }, - { - "datasource": "Prometheus", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "% time in GC", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 2, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "decimals": 2, - "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "cts" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 9, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_records_consumed_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Records Consumed Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 10, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_records_lag_max{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Records Lag Max", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 11, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Fetch Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 12, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_latency_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_latency_max{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Fetch Latency", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 13, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_size_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_size_max{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Fetch Size", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 14, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_throttle_time_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_throttle_time_max{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Fetch Throttle Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Performance", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 15, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 16, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_commit_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Commit Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 17, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_join_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Join Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 18, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_sync_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Sync Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 19, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_commit_latency_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_commit_latency_max{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Commit Latency", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 20, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_join_time_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_join_time_max{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Join Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 21, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_sync_time_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_sync_time_max{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Sync Time", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 22, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_heartbeat_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Heartbeat Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 23, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_heartbeat_response_time_max{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Heartbeat Response Time (Max.)", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 24, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_last_heartbeat_seconds_ago{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Last Heartbeat Seconds Ago", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 25, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_rebalance_rate_per_hour{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_failed_rebalance_rate_per_hour{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} (failed)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Rebalance Rate Per Hour", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 26, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_rebalance_latency_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_rebalance_latency_max{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Rebalance Latency", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 27, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_assigned_partitions{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Assigned Partitions", + "transformations": [], + "transparent": false, + "type": "timeseries" + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Consumer group", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 28, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 29, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_metrics_connection_count{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connection Count", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 30, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_metrics_connection_creation_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connection Creation Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 31, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_metrics_connection_close_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connection Close Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 9 + }, + "height": null, + "hideTimeOverride": false, + "id": 32, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_metrics_io_ratio{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "IO ratio", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 9 + }, + "height": null, + "hideTimeOverride": false, + "id": 33, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_metrics_io_wait_ratio{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "IO wait ratio", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] }, - { - "color": "red", - "value": 80 - } - ] + "unit": "cps" + }, + "overrides": [] }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 9 - }, - "id": 66, - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 9 + }, + "height": null, + "hideTimeOverride": false, + "id": 34, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_metrics_select_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } ], - "displayMode": "table", - "placement": "bottom" + "timeFrom": null, + "timeShift": null, + "title": "Select Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" }, - "tooltip": { - "mode": "single" - } - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "exemplar": true, - "expr": "sum without(gc)(rate(jvm_gc_collection_seconds_sum{env=\"$env\",hostname=~\"$hostname\"}[5m]))", - "interval": "", - "legendFormat": "{{job}}@{{instance}}", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "JVM GC time", - "type": "timeseries" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 17 - }, - "id": 24, - "panels": [ { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The average number of bytes consumed per second\n", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -616,7 +3241,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -627,51 +3252,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "decimals": 1, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" + "steps": [] + }, + "unit": "ns" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 12, + "h": 10, + "w": 8, "x": 0, - "y": 6 + "y": 10 }, - "id": 6, + "height": null, + "hideTimeOverride": false, + "id": 35, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull", "max", - "mean" + "mean", + "last" ], "displayMode": "table", "placement": "bottom" @@ -680,25 +3299,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "rate(kafka_consumer_consumer_fetch_manager_metrics_bytes_consumed_total{topic=~\"$topic\", client_id=~\"$client_id\", env=\"$env\",hostname=~\"$hostname\"}[$__rate_interval])", + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_metrics_io_time_ns_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{client_id}}@{{hostname}}", - "refId": "B" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Bytes consumed rate", + "title": "IO time avg.", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The average number of records consumed per second.", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -709,7 +3341,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -720,50 +3352,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" + "steps": [] + }, + "unit": "ns" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 6 + "h": 10, + "w": 8, + "x": 8, + "y": 10 }, - "id": 9, + "height": null, + "hideTimeOverride": false, + "id": 36, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull", "max", - "mean" + "mean", + "last" ], "displayMode": "table", "placement": "bottom" @@ -772,25 +3399,80 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "rate(kafka_consumer_consumer_fetch_manager_metrics_records_consumed_total{topic=~\"$topic\", client_id=~\"$client_id\", env=\"$env\",hostname=~\"$hostname\"}[$__rate_interval])", + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_metrics_io_wait_time_ns_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{client_id}}@{{hostname}}", - "refId": "B" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Rate of records consumed", + "title": "IO wait time avg.", + "transformations": [], + "transparent": false, "type": "timeseries" - }, + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Connections", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 11 + }, + "height": null, + "hideTimeOverride": false, + "id": 37, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The average number of bytes consumed per topic per second.\n", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -801,7 +3483,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -812,51 +3494,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "decimals": 1, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" + "steps": [] + }, + "unit": "binBps" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 12, + "h": 10, + "w": 8, "x": 0, - "y": 14 + "y": 11 }, - "id": 4, + "height": null, + "hideTimeOverride": false, + "id": 38, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull", "max", - "mean" + "mean", + "last" ], "displayMode": "table", "placement": "bottom" @@ -865,25 +3541,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(rate(kafka_consumer_consumer_fetch_manager_metrics_bytes_consumed_total{topic!=\"\", client_id=~\"$client_id\", env=\"$env\",hostname=~\"$hostname\"}[$__rate_interval])) by (topic)", + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_node_metrics_incoming_byte_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, "instant": false, "interval": "", - "legendFormat": "{{topic}}", - "refId": "B" + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} <- {{node_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Bytes consumed rate per topic", + "title": "Incoming Byte Rate", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The average number of records consumed per second per topic.\n", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -894,7 +3583,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -905,50 +3594,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" + "steps": [] + }, + "unit": "binBps" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 14 + "h": 10, + "w": 8, + "x": 8, + "y": 11 }, - "id": 8, + "height": null, + "hideTimeOverride": false, + "id": 39, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull", "max", - "mean" + "mean", + "last" ], "displayMode": "table", "placement": "bottom" @@ -957,40 +3641,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum by (topic) (rate(kafka_consumer_consumer_fetch_manager_metrics_records_consumed_total{topic!=\"\", client_id=~\"$client_id\", env=\"$env\",hostname=~\"$hostname\"}[$__rate_interval]))", + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_node_metrics_outgoing_byte_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, "instant": false, "interval": "", - "legendFormat": "{{topic}}", - "refId": "B" + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} -> {{node_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Rate of records consumed per topic", + "title": "Outgoing Byte Rate", + "transformations": [], + "transparent": false, "type": "timeseries" - } - ], - "title": "Throughput", - "type": "row" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 18 - }, - "id": 30, - "panels": [ + }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The number of fetch requests per second.\n", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -1001,7 +3683,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -1012,50 +3694,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" + "steps": [] + }, + "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 7, + "h": 10, "w": 8, - "x": 0, - "y": 7 + "x": 16, + "y": 11 }, - "id": 5, + "height": null, + "hideTimeOverride": false, + "id": 40, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull", "max", - "mean" + "mean", + "last" ], "displayMode": "table", "placement": "bottom" @@ -1064,25 +3741,52 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "rate(kafka_consumer_consumer_fetch_manager_metrics_fetch_total{client_id=~\"$client_id\", env=\"$env\",hostname=~\"$hostname\"}[$__rate_interval])", + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_node_metrics_request_latency_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, "instant": false, "interval": "", - "legendFormat": "{{client_id}}@{{hostname}}", - "refId": "B" + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} -> {{node_id}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_node_metrics_request_latency_max{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} -> {{node_id}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Fetch request rate", + "title": "Request Latency", + "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": null, - "description": "The average number of bytes fetched per request for a topic", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -1093,7 +3797,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -1104,50 +3808,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "decbytes" + "steps": [] + }, + "unit": "reqps" }, "overrides": [] }, "gridPos": { - "h": 7, + "h": 10, "w": 8, - "x": 8, - "y": 7 + "x": 0, + "y": 12 }, - "id": 34, + "height": null, + "hideTimeOverride": false, + "id": 41, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull", "max", - "mean" + "mean", + "last" ], "displayMode": "table", "placement": "bottom" @@ -1156,24 +3855,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "kafka_consumer_consumer_fetch_manager_metrics_fetch_size_avg{topic=~\"$topic\", client_id=~\"$client_id\", env=\"$env\",hostname=~\"$hostname\"}", + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_node_metrics_request_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{client_id}}@{{hostname}} - {{topic}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} -> {{node_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Fetch size avg", + "title": "Request Rate", + "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": null, - "description": "The average time taken for a fetch request.", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -1184,7 +3897,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -1195,50 +3908,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "ms" + "unit": "reqps" }, "overrides": [] }, "gridPos": { - "h": 7, + "h": 10, "w": 8, - "x": 16, - "y": 7 + "x": 8, + "y": 12 }, - "id": 32, + "height": null, + "hideTimeOverride": false, + "id": 42, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull", "max", - "mean" + "mean", + "last" ], "displayMode": "table", "placement": "bottom" @@ -1247,39 +3955,80 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "kafka_consumer_consumer_fetch_manager_metrics_fetch_latency_avg{client_id=~\"$client_id\", env=\"$env\",hostname=~\"$hostname\"}", + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_node_metrics_response_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{client_id}}@{{hostname}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} <- {{node_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Fetch latency average", + "title": "Response Rate", + "transformations": [], + "transparent": false, "type": "timeseries" } ], - "title": "Consumer Fetch Metrics", + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Per Broker", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 19 + "y": 13 }, - "id": 42, + "height": null, + "hideTimeOverride": false, + "id": 43, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { - "datasource": null, - "description": "The average time taken for a commit request", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -1290,7 +4039,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -1301,14 +4050,12 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } @@ -1316,34 +4063,32 @@ "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" + "steps": [] + }, + "unit": "binBps" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 12, + "h": 10, + "w": 8, "x": 0, - "y": 6 + "y": 11 }, - "id": 36, + "height": null, + "hideTimeOverride": false, + "id": 44, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull", "max", - "mean" + "mean", + "last" ], "displayMode": "table", "placement": "bottom" @@ -1352,24 +4097,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "kafka_consumer_consumer_coordinator_metrics_commit_latency_avg{client_id=~\"$client_id\", env=\"$env\",hostname=~\"$hostname\"}", + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_bytes_consumed_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{client_id}}@{{hostname}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} <- {{topic}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Commit latency average", + "title": "Bytes Consumed Rate per Topic", + "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": null, - "description": "The number of commit calls per second .", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -1380,7 +4139,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -1391,50 +4150,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" + "steps": [] + }, + "unit": "cps" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 6 + "h": 10, + "w": 8, + "x": 8, + "y": 11 }, - "id": 43, + "height": null, + "hideTimeOverride": false, + "id": 45, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull", "max", - "mean" + "mean", + "last" ], "displayMode": "table", "placement": "bottom" @@ -1443,39 +4197,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "kafka_consumer_consumer_coordinator_metrics_commit_rate{client_id=~\"$client_id\", env=\"$env\",hostname=~\"$hostname\"}", + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_records_consumed_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{client_id}}@{{hostname}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} <- {{topic}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Commit rate", + "title": "Records Consumed Rate per Topic", + "transformations": [], + "transparent": false, "type": "timeseries" - } - ], - "title": "Consumer Commit Metrics", - "type": "row" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 20 - }, - "id": 26, - "panels": [ + }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "Number of simultaneous connections\n", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -1486,7 +4239,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -1497,50 +4250,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" + "steps": [] + }, + "unit": "bytes" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 12, + "h": 10, + "w": 8, "x": 0, - "y": 48 + "y": 12 }, - "id": 11, + "height": null, + "hideTimeOverride": false, + "id": 46, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull", "max", - "mean" + "mean", + "last" ], "displayMode": "table", "placement": "bottom" @@ -1549,25 +4297,52 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "kafka_consumer_consumer_metrics_connection_count{client_id=~\"$client_id\", env=\"$env\",hostname=~\"$hostname\"}", + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_size_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, "instant": false, "interval": "", - "legendFormat": "{{client_id}}@{{hostname}}", - "refId": "B" + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} <- {{topic}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_size_max{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} <- {{topic}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Current connection count", + "title": "Fetch Size per Topic", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "response rate per node\n", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -1578,7 +4353,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -1589,50 +4364,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "normal" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" + "steps": [] + }, + "unit": "" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 48 + "h": 10, + "w": 8, + "x": 8, + "y": 11 }, - "id": 14, + "height": null, + "hideTimeOverride": false, + "id": 47, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull", "max", - "mean" + "mean", + "last" ], "displayMode": "table", "placement": "bottom" @@ -1641,207 +4411,146 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "kafka_consumer_consumer_node_metrics_response_rate{client_id=~\"$client_id\", env=\"$env\",hostname=~\"$hostname\"}", + "datasource": null, + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_records_per_request_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, "instant": false, "interval": "", - "legendFormat": "{{client_id}}@{{hostname}}", - "refId": "B" + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} <- {{topic}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Node reponse rate", + "title": "Records per Request Avg. per Topic", + "transformations": [], + "transparent": false, "type": "timeseries" } ], - "title": "Connections", + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Per Topic", + "transformations": [], + "transparent": false, "type": "row" } ], - "refresh": "10s", - "schemaVersion": 30, + "refresh": "30s", + "rows": [], + "schemaVersion": 12, + "sharedCrosshair": false, "style": "dark", - "tags": [], + "tags": [ + "confluent", + "kafka-client", + "kafka-consumer" + ], "templating": { "list": [ { "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { - "selected": true, - "text": [ - "All" - ], - "value": [ - "$__all" - ] - }, - "datasource": "Prometheus", - "definition": "label_values(kafka_consumer_app_info, client_id)", - "description": null, - "error": null, - "hide": 0, - "includeAll": true, - "label": "Client ID", - "multi": true, - "name": "client_id", - "options": [], - "query": { - "query": "label_values(kafka_consumer_app_info, client_id)", - "refId": "StandardVariableQuery" - }, - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "selected": true, - "text": [ - "All" - ], - "value": [ - "$__all" - ] + "selected": false, + "tags": [], + "text": null, + "value": null }, "datasource": "Prometheus", - "definition": "label_values(env)", - "description": null, - "error": null, "hide": 0, - "includeAll": true, + "includeAll": false, "label": "Environment", - "multi": true, + "multi": false, "name": "env", "options": [], - "query": { - "query": "label_values(env)", - "refId": "Prometheus-env-Variable-Query" - }, + "query": "label_values(env)", "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, "type": "query", "useTags": false }, { "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { - "selected": true, - "text": [ - "All" - ], - "value": [ - "$__all" - ] + "selected": false, + "tags": [], + "text": null, + "value": null }, "datasource": "Prometheus", - "definition": "label_values(kafka_consumergroup_group_lag, group)", - "description": null, - "error": null, "hide": 0, "includeAll": true, - "label": "Group ID", + "label": "Server", "multi": true, - "name": "consumer_group", + "name": "server", "options": [], - "query": { - "query": "label_values(kafka_consumergroup_group_lag, group)", - "refId": "StandardVariableQuery" - }, + "query": "label_values(kafka_consumer_consumer_metrics_incoming_byte_rate{env=\"$env\", client_type=\"consumer\"},hostname)", "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, "type": "query", "useTags": false }, { "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { - "selected": true, - "text": [ - "All" - ], - "value": [ - "$__all" - ] + "selected": false, + "tags": [], + "text": null, + "value": null }, "datasource": "Prometheus", - "definition": "label_values(topic)", - "description": null, - "error": null, "hide": 0, "includeAll": true, - "label": "topic", + "label": "Client ID", "multi": true, - "name": "topic", + "name": "client_id", "options": [], - "query": { - "query": "label_values(topic)", - "refId": "StandardVariableQuery" - }, + "query": "label_values(kafka_consumer_consumer_metrics_incoming_byte_rate{env=\"$env\", client_type=\"consumer\"},client_id)", "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, "type": "query", "useTags": false - }, - { - "allValue": null, - "current": { - "selected": true, - "text": [ - "All" - ], - "value": [ - "$__all" - ] - }, - "datasource": null, - "definition": "label_values(kafka_consumer_app_info, hostname)", - "description": null, - "error": null, - "hide": 0, - "includeAll": true, - "label": "Hostname", - "multi": true, - "name": "hostname", - "options": [], - "query": { - "query": "label_values(kafka_consumer_app_info, hostname)", - "refId": "StandardVariableQuery" - }, - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "type": "query" } ] }, "time": { - "from": "now-15m", + "from": "now-1h", "to": "now" }, "timepicker": { + "hidden": false, "refresh_intervals": [ "5s", "10s", @@ -1853,10 +4562,21 @@ "1h", "2h", "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" ] }, - "timezone": "", - "title": "Kafka Consumer", - "uid": "-C-IEldWk2", - "version": 1 -} \ No newline at end of file + "timezone": "browser", + "title": "Kafka Consumer - v2", + "uid": null, + "version": 0 +} diff --git a/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-producer.json b/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-producer.json index 57a3610f..17359359 100644 --- a/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-producer.json +++ b/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-producer.json @@ -1,268 +1,360 @@ { + "__inputs": [ + { + "description": "", + "label": "Prometheus", + "name": "DS_PROMETHEUS", + "pluginId": "prometheus", + "pluginName": "Prometheus", + "type": "datasource" + } + ], "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "target": { - "limit": 100, - "matchAny": false, - "tags": [], - "type": "dashboard" - }, - "type": "dashboard" - } - ] + "list": [] }, - "description": "Java client Kafka Producer metrics", + "description": "Overview of the Kafka producers", "editable": true, "gnetId": null, - "graphTooltip": 0, - "id": 5, - "iteration": 1635958303882, + "hideControls": false, + "id": null, "links": [], "panels": [ { + "cacheTimeout": null, "collapsed": false, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }, - "id": 60, + "height": null, + "hideTimeOverride": false, + "id": 1, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [], - "title": "Key metrics", - "type": "row" - }, - { - "datasource": null, - "description": "", - "gridPos": { - "h": 3, - "w": 24, - "x": 0, - "y": 1 - }, - "id": 58, - "options": { - "content": "# Disclaimer\n\n⚠️ This dashboard has some sample thresholds, this example is not meant to fit all use cases nor is it meant for production. Think of it as a learning tool to help you become comfortable with the metrics and thresholding.\n", - "mode": "markdown" - }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], "timeFrom": null, "timeShift": null, - "type": "text" + "title": "Overview", + "transformations": [], + "transparent": false, + "type": "row" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The average per-second number of retried record sends for a topic. An increase could signal connectivity problems from the application to the broker. ", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "links": [], + "custom": {}, + "decimals": null, "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] - } + }, + "unit": "none" }, "overrides": [] }, "gridPos": { "h": 5, - "w": 5, + "w": 4, "x": 0, - "y": 4 + "y": 0 }, - "id": 9, + "height": null, + "hideTimeOverride": false, + "id": 2, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { - "colorMode": "background", + "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ - "mean" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "kafka_producer_producer_metrics_record_retry_rate{env=\"$env\",client_id=~\"$client_id\",hostname=~\"$hostname\"}", + "datasource": null, + "expr": "topk(10, kafka_producer_producer_metrics_record_send_rate{env=\"$env\", client_type=\"producer\", client_id=~\"$client_id\", hostname=~\"$server\"} > 0)", + "format": "time_series", "hide": false, "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{client_id}}@{{hostname}}", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Retry rate", + "title": "Record Send Rate", + "transformations": [], + "transparent": false, "type": "stat" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The average per-second number of record sends that resulted in errors.\n", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "links": [], + "custom": {}, + "decimals": null, "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { "color": "green", - "value": null + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" }, { "color": "red", - "value": 1 + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" } ] - } + }, + "unit": "none" }, "overrides": [] }, "gridPos": { "h": 5, - "w": 5, - "x": 5, - "y": 4 + "w": 4, + "x": 4, + "y": 0 }, - "id": 8, + "height": null, + "hideTimeOverride": false, + "id": 3, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { - "colorMode": "background", + "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ - "mean" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "rate(kafka_producer_producer_metrics_record_error_total{env=\"$env\",client_id=~\"$client_id\",hostname=~\"$hostname\"}[$__rate_interval])", + "datasource": null, + "expr": "topk(10, kafka_producer_producer_metrics_record_error_rate{env=\"$env\", client_type=\"producer\", client_id=~\"$client_id\", hostname=~\"$server\"} > 0)", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{client_id}}@{{hostname}}", - "refId": "B" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Record error rate", + "title": "Error Rate", + "transformations": [], + "transparent": false, "type": "stat" }, { - "datasource": null, - "description": "he total amount of buffer memory that is not being used (either unallocated or in the free list).", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { + "custom": {}, + "decimals": null, "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "red", - "value": null + "color": "green", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" }, { - "color": "#EAB839", - "value": 5 + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" }, { - "color": "green", - "value": 10 + "color": "red", + "index": 2, + "line": true, + "op": "gt", + "value": 10.0, + "yaxis": "left" } ] }, - "unit": "decbytes" + "unit": "none" }, "overrides": [] }, "gridPos": { "h": 5, "w": 4, - "x": 10, - "y": 4 + "x": 8, + "y": 0 }, - "id": 56, + "height": null, + "hideTimeOverride": false, + "id": 4, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { - "colorMode": "background", + "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ - "mean" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "kafka_producer_producer_metrics_buffer_available_bytes{env=\"$env\",client_id=~\"$client_id\",hostname=~\"$hostname\"}", + "datasource": null, + "expr": "topk(10, kafka_producer_producer_metrics_record_retry_rate{env=\"$env\", client_type=\"producer\", client_id=~\"$client_id\", hostname=~\"$server\"} > 0)", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{client_id}}@{{hostname}}", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Free buffer space", + "title": "Retry Rate", + "transformations": [], + "transparent": false, "type": "stat" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The average time in ms a request was throttled by a broker.", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "links": [], + "custom": {}, + "decimals": null, "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 1 + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] }, @@ -272,406 +364,1119 @@ }, "gridPos": { "h": 5, - "w": 5, - "x": 14, - "y": 4 + "w": 4, + "x": 12, + "y": 0 }, - "id": 18, + "height": null, + "hideTimeOverride": false, + "id": 5, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { - "colorMode": "background", + "colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ - "mean" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "kafka_producer_producer_metrics_produce_throttle_time_avg{env=\"$env\",client_id=~\"$client_id\",hostname=~\"$hostname\"}", + "datasource": null, + "expr": "count(kafka_producer_app_info{env=\"$env\", client_id=~\"$client_id\", version!=\"\", hostname=~\"$server\"}) by (version)", "format": "time_series", "hide": false, "instant": false, "interval": "", - "legendFormat": "{{client_id}}@{{hostname}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{version}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Produce request throttle average", + "title": "Versions", + "transformations": [], + "transparent": false, "type": "stat" }, { + "cacheTimeout": null, + "collapsed": true, "datasource": null, - "description": "The average compression rate of record batches for a topic, defined as the average ratio of the compressed batch size over the uncompressed size.", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "mappings": [], - "max": 1, - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 5, - "x": 19, - "y": 4 - }, - "id": 54, - "options": { - "colorMode": "background", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "mean" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "exemplar": true, - "expr": "kafka_producer_producer_metrics_compression_rate_avg{env=\"$env\",client_id=~\"$client_id\",hostname=~\"$hostname\"}", - "interval": "", - "legendFormat": "{{client_id}}@{{hostname}}", - "refId": "A" + "steps": [] + } } - ], - "timeFrom": null, - "timeShift": null, - "title": "Compression rate", - "type": "stat" - }, - { - "datasource": "Prometheus", - "description": "The average request latency in ms.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" - }, - "overrides": [] }, "gridPos": { - "h": 8, - "w": 8, + "h": 1, + "w": 24, "x": 0, - "y": 9 - }, - "id": 10, - "options": { - "legend": { - "calcs": [ - "lastNotNull", - "max", - "mean" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } + "y": 1 }, - "pluginVersion": "8.1.3", - "targets": [ + "height": null, + "hideTimeOverride": false, + "id": 6, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ { - "exemplar": true, - "expr": "kafka_producer_producer_metrics_request_latency_avg{env=\"$env\",client_id=~\"$client_id\",hostname=~\"$hostname\"}", - "hide": false, - "interval": "", - "legendFormat": "{{client_id}}@{{hostname}}", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Produce request latency", - "type": "timeseries" - }, - { - "datasource": null, - "description": "The average time in ms record batches spent in the send buffer.", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" }, - { - "color": "red", - "value": 80 - } - ] + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "binBps" + }, + "overrides": [] }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 8, - "y": 9 - }, - "id": 52, - "options": { - "legend": { - "calcs": [ - "lastNotNull", - "max", - "mean" + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 7, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_incoming_byte_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } ], - "displayMode": "table", - "placement": "bottom" + "timeFrom": null, + "timeShift": null, + "title": "Incoming Byte Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" }, - "tooltip": { - "mode": "single" - } - }, - "pluginVersion": "8.1.3", - "targets": [ { - "exemplar": true, - "expr": "kafka_producer_producer_metrics_record_queue_time_avg{env=\"$env\",client_id=~\"$client_id\",hostname=~\"$hostname\"}", - "format": "time_series", - "interval": "", - "legendFormat": "{{client_id}}@{{hostname}}", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Record queue time", - "type": "timeseries" - }, - { - "datasource": "Prometheus", - "description": "The rate of failed authentication per seconds\n", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "binBps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 8, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_outgoing_byte_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Outgoing Byte Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 1 + }, + "height": null, + "hideTimeOverride": false, + "id": 9, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_metadata_age{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Metadata Age", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 10, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_request_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Request Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 11, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_requests_in_flight{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Request in-flight", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 12, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_records_per_request_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Records per Request (avg.)", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 13, + "interval": null, "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_record_send_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Record Send Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" }, - { - "color": "red", - "value": 80 - } - ] + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 14, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_record_retry_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Record Retry Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "cps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 15, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_record_error_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Record Error Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 16, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 8, - "x": 16, - "y": 9 - }, - "id": 32, - "options": { - "legend": { - "calcs": [ - "lastNotNull", - "max", - "mean" + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_record_size_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_record_size_max{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } ], - "displayMode": "table", - "placement": "bottom" + "timeFrom": null, + "timeShift": null, + "title": "Record Size", + "transformations": [], + "transparent": false, + "type": "timeseries" }, - "tooltip": { - "mode": "single" - } - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "exemplar": true, - "expr": "rate(kafka_producer_producer_metrics_failed_authentication_total{env=\"$env\",client_id=~\"$client_id\",hostname=~\"$hostname\"}[$__rate_interval])", - "hide": false, - "interval": "", - "legendFormat": "{{client_id}}@{{hostname}}", - "refId": "B" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Failed authentication rate", - "type": "timeseries" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 17 - }, - "id": 64, - "panels": [ { + "cacheTimeout": null, "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { - "axisLabel": "Cores", + "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -682,50 +1487,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "percent" + "steps": [] + }, + "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 10, "w": 8, - "x": 0, - "y": 18 + "x": 8, + "y": 4 }, - "id": 62, + "height": null, + "hideTimeOverride": false, + "id": 17, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -734,34 +1534,63 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "rate(process_cpu_seconds_total{env=\"$env\",hostname=~\"$hostname\"}[5m])", + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_record_queue_time_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_record_queue_time_max{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{job}}@{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "CPU Usage", + "title": "Record Queue Time", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { - "axisLabel": "Memory", + "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -769,53 +1598,48 @@ "viz": false }, "lineInterpolation": "linear", - "lineWidth": 2, + "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "bytes" + "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 10, "w": 8, - "x": 8, - "y": 18 + "x": 16, + "y": 4 }, - "id": 66, + "height": null, + "hideTimeOverride": false, + "id": 18, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -824,40 +1648,63 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum without(area)(jvm_memory_bytes_used{env=\"$env\",hostname=~\"$hostname\"})", + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_produce_throttle_time_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{job}}@{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" }, { - "expr": "jvm_memory_bytes_max{job=\"kafka-connect\",env=\"$env\",kafka_connect_cluster_id=~\"$kafka_connect_cluster_id\",instance=~\"$instance\",area=\"heap\"}", + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_produce_throttle_time_max{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{instance}}", - "refId": "B" + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "JVM Memory Used", + "title": "Produce Throttle Time", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { - "axisLabel": "% time in GC", + "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -865,55 +1712,48 @@ "viz": false }, "lineInterpolation": "linear", - "lineWidth": 2, + "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "decimals": 2, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "percentunit" + "unit": "bytes" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 10, "w": 8, - "x": 16, - "y": 18 + "x": 0, + "y": 5 }, - "id": 68, + "height": null, + "hideTimeOverride": false, + "id": 19, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -922,50 +1762,63 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum without(gc)(rate(jvm_gc_collection_seconds_sum{env=\"$env\",hostname=~\"$hostname\"}[5m]))", + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_batch_size_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_batch_size_max{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{job}}@{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "JVM GC time", + "title": "Batch Size", + "transformations": [], + "transparent": false, "type": "timeseries" - } - ], - "title": "System", - "type": "row" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 18 - }, - "id": 25, - "panels": [ + }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The average number of bytes sent per second to the broker.\n", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { - "axisLabel": "Bytes/s", + "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -976,50 +1829,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "Bps" + "steps": [] + }, + "unit": "cps" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 19 + "h": 10, + "w": 8, + "x": 8, + "y": 5 }, - "id": 2, + "height": null, + "hideTimeOverride": false, + "id": 20, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull", "max", - "mean" + "mean", + "last" ], "displayMode": "table", "placement": "bottom" @@ -1028,35 +1876,49 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "rate(kafka_producer_producer_metrics_outgoing_byte_total{env=\"$env\",client_id=~\"$client_id\",hostname=~\"$hostname\"}[$__rate_interval])", + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_batch_split_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{client_id}}@{{hostname}}", - "refId": "B" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Outgoing byte rate", + "title": "Batch Split Rate", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The average number of bytes sent per second to the broker per topic.\n", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { - "axisLabel": "Bytes/s", + "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -1067,50 +1929,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "Bps" + "steps": [] + }, + "unit": "percentunit" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 19 + "h": 10, + "w": 8, + "x": 16, + "y": 5 }, - "id": 3, + "height": null, + "hideTimeOverride": false, + "id": 21, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull", "max", - "mean" + "mean", + "last" ], "displayMode": "table", "placement": "bottom" @@ -1119,36 +1976,91 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(rate(kafka_producer_producer_topic_metrics_byte_total{env=\"$env\",client_id=~\"$client_id\",hostname=~\"$hostname\",topic=~\"$topic\"}[$__rate_interval])) by (topic)", + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_compression_rate_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{topic}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Outgoing byte rate per topic", + "title": "Compression Rate (avg.)", + "transformations": [], + "transparent": false, "type": "timeseries" - }, + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Performance", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 6 + }, + "height": null, + "hideTimeOverride": false, + "id": 22, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The average number of messages sent per second to the broker.\n", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { - "axisLabel": "Messages/s", + "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -1159,50 +2071,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" + "steps": [] + }, + "unit": "" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 12, + "h": 10, + "w": 8, "x": 0, - "y": 27 + "y": 6 }, - "id": 4, + "height": null, + "hideTimeOverride": false, + "id": 23, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull", "max", - "mean" + "mean", + "last" ], "displayMode": "table", "placement": "bottom" @@ -1211,36 +2118,49 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "rate(kafka_producer_producer_metrics_record_send_total{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$hostname\"}[$__rate_interval])", + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_connection_count{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{client_id}}@{{hostname}}", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Outgoing messages per second", + "title": "Connection Count", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The average number of messages sent per second to the broker per topic.\n", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { - "axisLabel": "Messages/s", + "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -1251,50 +2171,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "iops" + "steps": [] + }, + "unit": "cps" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 27 + "h": 10, + "w": 8, + "x": 8, + "y": 6 }, - "id": 5, + "height": null, + "hideTimeOverride": false, + "id": 24, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull", "max", - "mean" + "mean", + "last" ], "displayMode": "table", "placement": "bottom" @@ -1303,40 +2218,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(rate(kafka_producer_producer_topic_metrics_record_send_total{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$hostname\"}[$__rate_interval])) by (topic)", + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_connection_creation_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{topic}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Outgoing messages per second per topic", + "title": "Connection Creation Rate", + "transformations": [], + "transparent": false, "type": "timeseries" - } - ], - "title": "Throughput", - "type": "row" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 19 - }, - "id": 27, - "panels": [ + }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The average number of bytes sent per partition per-request.", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -1347,7 +2260,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -1358,50 +2271,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "bytes" + "unit": "cps" }, "overrides": [] }, "gridPos": { - "h": 7, + "h": 10, "w": 8, - "x": 0, - "y": 20 + "x": 16, + "y": 6 }, - "id": 7, + "height": null, + "hideTimeOverride": false, + "id": 25, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull", "max", - "mean" + "mean", + "last" ], "displayMode": "table", "placement": "bottom" @@ -1410,25 +2318,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "kafka_producer_producer_metrics_batch_size_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$hostname\"}", + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_connection_close_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{client_id}}@{{hostname}}", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Batch size average", + "title": "Connection Close Rate", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The current number of in-flight requests awaiting a response.", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -1439,7 +2360,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -1450,50 +2371,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" + "steps": [] + }, + "unit": "" }, "overrides": [] }, "gridPos": { - "h": 7, + "h": 10, "w": 8, - "x": 8, - "y": 20 + "x": 0, + "y": 7 }, - "id": 16, + "height": null, + "hideTimeOverride": false, + "id": 26, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull", "max", - "mean" + "mean", + "last" ], "displayMode": "table", "placement": "bottom" @@ -1502,25 +2418,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "kafka_producer_producer_metrics_requests_in_flight{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$hostname\"}", + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_io_ratio{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{client_id}}@{{hostname}}", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Request in flight", + "title": "IO ratio", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The average record size", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -1531,7 +2460,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -1542,50 +2471,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "decbytes" + "steps": [] + }, + "unit": "" }, "overrides": [] }, "gridPos": { - "h": 7, + "h": 10, "w": 8, - "x": 16, - "y": 20 + "x": 8, + "y": 7 }, - "id": 15, + "height": null, + "hideTimeOverride": false, + "id": 27, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull", "max", - "mean" + "mean", + "last" ], "displayMode": "table", "placement": "bottom" @@ -1594,40 +2518,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "kafka_producer_producer_metrics_record_size_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$hostname\"}", + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_io_wait_ratio{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{client_id}}@{{hostname}}", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Record size average", + "title": "IO wait ratio", + "transformations": [], + "transparent": false, "type": "timeseries" - } - ], - "title": "Performance", - "type": "row" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 20 - }, - "id": 23, - "panels": [ + }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The average number of requests sent per second to the broker.\n", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -1638,7 +2560,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -1649,50 +2571,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" + "steps": [] + }, + "unit": "cps" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 10, "w": 8, - "x": 0, - "y": 21 + "x": 16, + "y": 7 }, - "id": 12, + "height": null, + "hideTimeOverride": false, + "id": 28, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull", "max", - "mean" + "mean", + "last" ], "displayMode": "table", "placement": "bottom" @@ -1701,25 +2618,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "rate(kafka_producer_producer_metrics_request_total{env=\"$env\",client_id=~\"$client_id\",hostname=~\"$hostname\"}[$__rate_interval])", + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_select_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{client_id}}@{{hostname}}", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Produce request rate", + "title": "Select Rate", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The average number of records per request.", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -1730,7 +2660,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -1741,50 +2671,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "Bps" + "steps": [] + }, + "unit": "ns" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 10, "w": 8, - "x": 8, - "y": 21 + "x": 0, + "y": 8 }, - "id": 11, + "height": null, + "hideTimeOverride": false, + "id": 29, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull", "max", - "mean" + "mean", + "last" ], "displayMode": "table", "placement": "bottom" @@ -1793,25 +2718,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "kafka_producer_producer_metrics_request_size_avg{env=\"$env\",client_id=~\"$client_id\",hostname=~\"$hostname\"}", + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_io_time_ns_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{client_id}}@{{hostname}}", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Produce request size average", + "title": "IO time avg.", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The average number of response received per second to the broker.\n", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -1822,7 +2760,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -1833,50 +2771,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" + "steps": [] + }, + "unit": "ns" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 10, "w": 8, - "x": 16, - "y": 21 + "x": 8, + "y": 8 }, - "id": 13, + "height": null, + "hideTimeOverride": false, + "id": 30, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull", "max", - "mean" + "mean", + "last" ], "displayMode": "table", "placement": "bottom" @@ -1885,40 +2818,80 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "rate(kafka_producer_producer_metrics_response_total{env=\"$env\",client_id=~\"$client_id\",hostname=~\"$hostname\"}[$__rate_interval])", + "datasource": null, + "expr": "topk(10,kafka_producer_producer_metrics_io_wait_time_ns_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{client_id}}@{{hostname}}", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Produce response rate", + "title": "IO wait time avg.", + "transformations": [], + "transparent": false, "type": "timeseries" } ], - "title": "Produce Request metrics", + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Connections", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 21 + "y": 8 }, - "id": 35, + "height": null, + "hideTimeOverride": false, + "id": 31, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { - "datasource": null, - "description": "The current number of active connections.\n", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -1929,7 +2902,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -1940,50 +2913,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" + "steps": [] + }, + "unit": "binBps" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 10, "w": 8, "x": 0, - "y": 22 + "y": 8 }, - "id": 37, + "height": null, + "hideTimeOverride": false, + "id": 32, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull", "max", - "mean" + "mean", + "last" ], "displayMode": "table", "placement": "bottom" @@ -1992,24 +2960,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "rate(kafka_producer_producer_metrics_connection_count{env=\"$env\",client_id=~\"$client_id\",hostname=~\"$hostname\"}[$__rate_interval])", + "datasource": null, + "expr": "topk(10,kafka_producer_producer_node_metrics_incoming_byte_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{client_id}}@{{hostname}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} <- {{node_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Connection rate", + "title": "Incoming Byte Rate", + "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": null, - "description": "New connections established per second in the window.", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -2020,7 +3002,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -2031,50 +3013,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" + "steps": [] + }, + "unit": "binBps" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 10, "w": 8, "x": 8, - "y": 22 + "y": 8 }, - "id": 38, + "height": null, + "hideTimeOverride": false, + "id": 33, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull", "max", - "mean" + "mean", + "last" ], "displayMode": "table", "placement": "bottom" @@ -2083,24 +3060,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "kafka_producer_producer_metrics_connection_creation_rate{env=\"$env\",client_id=~\"$client_id\",hostname=~\"$hostname\"}", + "datasource": null, + "expr": "topk(10,kafka_producer_producer_node_metrics_outgoing_byte_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{client_id}}@{{hostname}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} -> {{node_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Connection creation rate", + "title": "Outgoing Byte Rate", + "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": null, - "description": "Connections closed per second in the window.", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -2111,7 +3102,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -2122,50 +3113,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" + "steps": [] + }, + "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 10, "w": 8, "x": 16, - "y": 22 + "y": 8 }, - "id": 39, + "height": null, + "hideTimeOverride": false, + "id": 34, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull", "max", - "mean" + "mean", + "last" ], "displayMode": "table", "placement": "bottom" @@ -2174,24 +3160,52 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "kafka_producer_producer_metrics_connection_close_rate{env=\"$env\",client_id=~\"$client_id\",hostname=~\"$hostname\"}", + "datasource": null, + "expr": "topk(10,kafka_producer_producer_node_metrics_request_latency_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} -> {{node_id}} (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "topk(10,kafka_producer_producer_node_metrics_request_latency_max{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{client_id}}@{{hostname}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} -> {{node_id}} (max.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Connection close rate", + "title": "Request Latency", + "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": null, - "description": "The fraction of time the I/O thread spent doing I/O.\n", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -2202,7 +3216,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -2213,50 +3227,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" + "steps": [] + }, + "unit": "reqps" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 10, "w": 8, "x": 0, - "y": 30 + "y": 9 }, - "id": 40, + "height": null, + "hideTimeOverride": false, + "id": 35, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull", "max", - "mean" + "mean", + "last" ], "displayMode": "table", "placement": "bottom" @@ -2265,24 +3274,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "kafka_producer_producer_metrics_io_ratio{env=\"$env\",client_id=~\"$client_id\",hostname=~\"$hostname\"}", + "datasource": null, + "expr": "topk(10,kafka_producer_producer_node_metrics_request_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{client_id}}@{{hostname}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} -> {{node_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "IO Ratio", + "title": "Request Rate", + "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": null, - "description": "The average length of time the I/O thread spent waiting for a socket ready for reads or writes in nanoseconds.\n", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -2293,7 +3316,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -2304,50 +3327,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "ns" + "unit": "reqps" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 10, "w": 8, "x": 8, - "y": 30 + "y": 9 }, - "id": 43, + "height": null, + "hideTimeOverride": false, + "id": 36, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull", "max", - "mean" + "mean", + "last" ], "displayMode": "table", "placement": "bottom" @@ -2356,24 +3374,80 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "kafka_producer_producer_metrics_io_wait_time_ns_avg{env=\"$env\",client_id=~\"$client_id\",hostname=~\"$hostname\"}", + "datasource": null, + "expr": "topk(10,kafka_producer_producer_node_metrics_response_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{client_id}}@{{hostname}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} <- {{node_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "IO Wait time average", + "title": "Response Rate", + "transformations": [], + "transparent": false, "type": "timeseries" - }, + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Per Broker", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 10 + }, + "height": null, + "hideTimeOverride": false, + "id": 37, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ { - "datasource": null, - "description": "The average length of time for I/O per select call in nanoseconds.", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -2384,7 +3458,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -2395,50 +3469,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "ns" + "unit": "binBps" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 10, "w": 8, - "x": 16, - "y": 30 + "x": 0, + "y": 10 }, - "id": 41, + "height": null, + "hideTimeOverride": false, + "id": 38, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull", "max", - "mean" + "mean", + "last" ], "displayMode": "table", "placement": "bottom" @@ -2447,24 +3516,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "kafka_producer_producer_metrics_io_time_ns_avg{env=\"$env\",client_id=~\"$client_id\",hostname=~\"$hostname\"}", + "datasource": null, + "expr": "topk(10,kafka_producer_producer_topic_metrics_byte_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{client_id}}@{{hostname}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} -> {{topic}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "IO Time average", + "title": "Byte Rate", + "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": null, - "description": "The fraction of time the I/O thread spent waiting.\n", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -2475,7 +3558,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -2486,50 +3569,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" + "steps": [] + }, + "unit": "percentunit" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 10, "w": 8, - "x": 0, - "y": 38 + "x": 8, + "y": 10 }, - "id": 42, + "height": null, + "hideTimeOverride": false, + "id": 39, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull", "max", - "mean" + "mean", + "last" ], "displayMode": "table", "placement": "bottom" @@ -2538,24 +3616,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "kafka_producer_producer_metrics_io_wait_ratio{env=\"$env\",client_id=~\"$client_id\",hostname=~\"$hostname\"}", + "datasource": null, + "expr": "topk(10,kafka_producer_producer_topic_metrics_compression_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{client_id}}@{{hostname}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} -> {{topic}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "IO Wait Ratio", + "title": "Compression Rate", + "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": null, - "description": "Number of times the I/O layer checked for new I/O to perform per second.\n", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -2566,7 +3658,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -2577,50 +3669,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" + "steps": [] + }, + "unit": "cps" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 10, "w": 8, - "x": 8, - "y": 38 + "x": 16, + "y": 10 }, - "id": 44, + "height": null, + "hideTimeOverride": false, + "id": 40, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull", "max", - "mean" + "mean", + "last" ], "displayMode": "table", "placement": "bottom" @@ -2629,103 +3716,138 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "kafka_producer_producer_metrics_select_rate{env=\"$env\",client_id=~\"$client_id\",hostname=~\"$hostname\"}", + "datasource": null, + "expr": "topk(10,kafka_producer_producer_topic_metrics_record_send_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{client_id}}@{{hostname}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} -> {{topic}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Select Rate", + "title": "Record Send Rate", + "transformations": [], + "transparent": false, "type": "timeseries" - } - ], - "title": "Connections", - "type": "row" - }, - { - "collapsed": true, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 22 - }, - "id": 31, - "panels": [ + }, { "cacheTimeout": null, - "datasource": null, - "description": "", + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" } }, - "mappings": [] + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "cps" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 10, "w": 8, "x": 0, - "y": 23 + "y": 11 }, - "id": 21, + "height": null, + "hideTimeOverride": false, + "id": 41, "interval": null, "links": [], - "maxDataPoints": 3, + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { - "displayMode": "list", - "placement": "bottom" - }, - "pieType": "pie", - "reduceOptions": { "calcs": [ - "lastNotNull" + "max", + "mean", + "last" ], - "fields": "", - "values": false + "displayMode": "table", + "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "6.3.0", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "count(kafka_producer_app_info{env=\"$env\",client_id=~\"$client_id\",hostname=~\"$hostname\",version!=\"\"}) by (version)", + "datasource": null, + "expr": "topk(10,kafka_producer_producer_topic_metrics_record_retry_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{version}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} -> {{topic}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "API Version", - "type": "piechart" + "title": "Record Retry Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "The age in seconds of the current producer metadata being used.", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -2736,7 +3858,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -2747,50 +3869,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "short" + "steps": [] + }, + "unit": "cps" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 16, + "h": 10, + "w": 8, "x": 8, - "y": 23 + "y": 11 }, - "id": 17, + "height": null, + "hideTimeOverride": false, + "id": 42, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull", "max", - "mean" + "mean", + "last" ], "displayMode": "table", "placement": "bottom" @@ -2799,173 +3916,146 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "kafka_producer_producer_metrics_metadata_age{env=\"$env\",client_id=~\"$client_id\",hostname=~\"$hostname\"}", + "datasource": null, + "expr": "topk(10,kafka_producer_producer_topic_metrics_record_error_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "format": "time_series", "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{client_id}}@{{hostname}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{client_id}}@{{hostname}} -> {{topic}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Metadata max age", + "title": "Record Error Rate", + "transformations": [], + "transparent": false, "type": "timeseries" } ], - "title": "Misc", + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Per Topic", + "transformations": [], + "transparent": false, "type": "row" } ], - "refresh": "10s", - "schemaVersion": 30, + "refresh": "30s", + "rows": [], + "schemaVersion": 12, + "sharedCrosshair": false, "style": "dark", - "tags": [], + "tags": [ + "confluent", + "kafka-client", + "kafka-producer" + ], "templating": { "list": [ { "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { "selected": false, - "text": [ - "All" - ], - "value": [ - "$__all" - ] + "tags": [], + "text": null, + "value": null }, "datasource": "Prometheus", - "definition": "label_values(kafka_producer_app_info, client_id)", - "description": null, - "error": null, "hide": 0, - "includeAll": true, - "label": "Client ID", - "multi": true, - "name": "client_id", + "includeAll": false, + "label": "Environment", + "multi": false, + "name": "env", "options": [], - "query": { - "query": "label_values(kafka_producer_app_info, client_id)", - "refId": "StandardVariableQuery" - }, + "query": "label_values(env)", "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, "type": "query", "useTags": false }, { "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { "selected": false, - "text": [ - "All" - ], - "value": [ - "$__all" - ] + "tags": [], + "text": null, + "value": null }, "datasource": "Prometheus", - "definition": "label_values(kafka_producer_producer_topic_metrics_record_send_total, topic)", - "description": null, - "error": null, "hide": 0, "includeAll": true, - "label": "Kafka topic", + "label": "Server", "multi": true, - "name": "topic", + "name": "server", "options": [], - "query": { - "query": "label_values(kafka_producer_producer_topic_metrics_record_send_total, topic)", - "refId": "StandardVariableQuery" - }, + "query": "label_values(kafka_producer_producer_metrics_record_retry_rate{env=\"$env\", client_type=\"producer\"},hostname)", "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, "type": "query", "useTags": false }, { "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { "selected": false, - "text": [ - "dev" - ], - "value": [ - "dev" - ] + "tags": [], + "text": null, + "value": null }, "datasource": "Prometheus", - "definition": "label_values(env)", - "description": null, - "error": null, "hide": 0, "includeAll": true, - "label": "Environment", + "label": "Client ID", "multi": true, - "name": "env", + "name": "client_id", "options": [], - "query": { - "query": "label_values(env)", - "refId": "Prometheus-env-Variable-Query" - }, + "query": "label_values(kafka_producer_producer_metrics_record_retry_rate{env=\"$env\", client_type=\"producer\"},client_id)", "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, "type": "query", "useTags": false - }, - { - "allValue": null, - "current": { - "selected": false, - "text": [ - "kafka1" - ], - "value": [ - "kafka1" - ] - }, - "datasource": null, - "definition": "label_values(kafka_producer_app_info, hostname)", - "description": null, - "error": null, - "hide": 0, - "includeAll": true, - "label": "Hostname", - "multi": true, - "name": "hostname", - "options": [], - "query": { - "query": "label_values(kafka_producer_app_info, hostname)", - "refId": "StandardVariableQuery" - }, - "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "type": "query" } ] }, "time": { - "from": "now-15m", + "from": "now-1h", "to": "now" }, "timepicker": { + "hidden": false, "refresh_intervals": [ "5s", "10s", @@ -2977,10 +4067,21 @@ "1h", "2h", "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" ] }, - "timezone": "", - "title": "Kafka Producer", - "uid": "-C-IEldWk", - "version": 1 -} \ No newline at end of file + "timezone": "browser", + "title": "Kafka Producer - v2", + "uid": null, + "version": 0 +} diff --git a/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-quotas.json b/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-quotas.json index 74b7655f..189bcad9 100644 --- a/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-quotas.json +++ b/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-quotas.json @@ -1,33 +1,30 @@ { + "__inputs": [ + { + "description": "", + "label": "Prometheus", + "name": "DS_PROMETHEUS", + "pluginId": "prometheus", + "pluginName": "Prometheus", + "type": "datasource" + } + ], "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "target": { - "limit": 100, - "matchAny": false, - "tags": [], - "type": "dashboard" - }, - "type": "dashboard" - } - ] + "list": [] }, + "description": "Overview of the Kafka quotass", "editable": true, "gnetId": null, - "graphTooltip": 0, - "id": 3, - "iteration": 1647426501739, + "hideControls": false, + "id": null, "links": [], "panels": [ { - "datasource": null, - "description": "Indicates the data produce rate of the client in bytes/sec", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -38,7 +35,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -49,34 +46,22 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "Bps" + "unit": "binBps" }, "overrides": [] }, @@ -86,13 +71,20 @@ "x": 0, "y": 0 }, - "id": 6, + "height": null, + "hideTimeOverride": false, + "id": 1, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull", "max", - "mean" + "mean", + "last" ], "displayMode": "table", "placement": "bottom" @@ -101,24 +93,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "topk(10, kafka_server_produce_byte_rate{user=~\"$user\",client_id=~\"$client_id\",instance=~\"$broker_id\",env=\"$env\",job=\"kafka-broker\"})", + "datasource": null, + "expr": "topk(10,kafka_server_produce_byte_rate{env=\"$env\",user=~\"$user\",client_id=~\"$client_id\", hostname=~\"$broker\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "User: {{ user }} - Client: {{ client_id }} @ Broker: {{ instance }}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "User:{{user}} | Client ID:{{client_id}} @ Broker:{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, "title": "Produce Byte Rate", + "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": null, - "description": "Indicates the data consume rate of the client in bytes/sec", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -129,7 +135,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -140,34 +146,22 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "Bps" + "unit": "binBps" }, "overrides": [] }, @@ -177,13 +171,20 @@ "x": 8, "y": 0 }, - "id": 7, + "height": null, + "hideTimeOverride": false, + "id": 2, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull", "max", - "mean" + "mean", + "last" ], "displayMode": "table", "placement": "bottom" @@ -192,24 +193,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "topk(10, kafka_server_fetch_byte_rate{user=~\"$user\",client_id=~\"$client_id\",instance=~\"$broker_id\",env=\"$env\",job=\"kafka-broker\"})", + "datasource": null, + "expr": "topk(10,kafka_server_fetch_byte_rate{env=\"$env\",user=~\"$user\",client_id=~\"$client_id\", hostname=~\"$broker\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "User: {{ user }} - Client: {{ client_id }} @ Broker: {{ instance }}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "User:{{user}} | Client ID:{{client_id}} @ Broker:{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, "title": "Fetch Byte Rate", + "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": null, - "description": "Indicates the percentage of time spent in broker network and I/O threads to process requests from client group", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -220,7 +235,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -231,32 +246,20 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "percent" }, @@ -268,13 +271,20 @@ "x": 16, "y": 0 }, - "id": 8, + "height": null, + "hideTimeOverride": false, + "id": 3, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull", "max", - "mean" + "mean", + "last" ], "displayMode": "table", "placement": "bottom" @@ -283,24 +293,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "topk(10, kafka_server_request_request_time{user=~\"$user\",client_id=~\"$client_id\",instance=~\"$broker_id\",env=\"$env\",job=\"kafka-broker\"})", + "datasource": null, + "expr": "topk(10,kafka_server_request_request_time{env=\"$env\",user=~\"$user\",client_id=~\"$client_id\", hostname=~\"$broker\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "User: {{ user }} - Client: {{ client_id }} @ Broker: {{ instance }}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "User:{{user}} | Client ID:{{client_id}} @ Broker:{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, "title": "Request Time", + "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": null, - "description": "Indicates the amount of time in ms the client was throttled", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -311,7 +335,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -322,32 +346,20 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, @@ -357,15 +369,22 @@ "h": 12, "w": 8, "x": 0, - "y": 12 + "y": 1 }, - "id": 3, + "height": null, + "hideTimeOverride": false, + "id": 4, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull", "max", - "mean" + "mean", + "last" ], "displayMode": "table", "placement": "bottom" @@ -374,24 +393,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "kafka_server_produce_throttle_time{user=~\"$user\",client_id=~\"$client_id\",instance=~\"$broker_id\",env=\"$env\",job=\"kafka-broker\"} > 0", + "datasource": null, + "expr": "topk(10,kafka_server_produce_throttle_time{env=\"$env\",user=~\"$user\",client_id=~\"$client_id\", hostname=~\"$broker\"} > 0)", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "User: {{ user }} - Client: {{ client_id }} @ Broker: {{ instance }}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "User:{{user}} | Client ID:{{client_id}} @ Broker:{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Produce Bandwidth Throttle", + "title": "Produce Throttle Time", + "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": null, - "description": "Indicates the amount of time in ms the client was throttled", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -402,7 +435,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -413,32 +446,20 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, @@ -448,15 +469,22 @@ "h": 12, "w": 8, "x": 8, - "y": 12 + "y": 1 }, + "height": null, + "hideTimeOverride": false, "id": 5, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull", "max", - "mean" + "mean", + "last" ], "displayMode": "table", "placement": "bottom" @@ -465,24 +493,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "kafka_server_fetch_throttle_time{user=~\"$user\",client_id=~\"$client_id\",instance=~\"$broker_id\",env=\"$env\",job=\"kafka-broker\"} > 0", + "datasource": null, + "expr": "topk(10,kafka_server_fetch_throttle_time{env=\"$env\",user=~\"$user\",client_id=~\"$client_id\", hostname=~\"$broker\"} > 0)", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "User: {{ user }} - Client: {{ client_id }} @ Broker: {{ instance }}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "User:{{user}} | Client ID:{{client_id}} @ Broker:{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Fetch Bandwidth Throttle", + "title": "Fetch Throttle Time", + "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": null, - "description": "Indicates the amount of time in ms the client was throttled", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -493,7 +535,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -504,32 +546,20 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, @@ -539,15 +569,22 @@ "h": 12, "w": 8, "x": 16, - "y": 12 + "y": 1 }, - "id": 4, + "height": null, + "hideTimeOverride": false, + "id": 6, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull", "max", - "mean" + "mean", + "last" ], "displayMode": "table", "placement": "bottom" @@ -556,168 +593,163 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "kafka_server_request_throttle_time{user=~\"$user\",client_id=~\"$client_id\",instance=~\"$broker_id\",env=\"$env\",job=\"kafka-broker\"} > 0", + "datasource": null, + "expr": "topk(10,kafka_server_request_throttle_time{env=\"$env\",user=~\"$user\",client_id=~\"$client_id\", hostname=~\"$broker\"} > 0)", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "User: {{ user }} - Client: {{ client_id }} @ Broker: {{ instance }}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "User:{{user}} | Client ID:{{client_id}} @ Broker:{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Request Throttle", + "title": "Request Throttle Time", + "transformations": [], + "transparent": false, "type": "timeseries" } ], - "refresh": "1m", - "schemaVersion": 30, + "refresh": "30s", + "rows": [], + "schemaVersion": 12, + "sharedCrosshair": false, "style": "dark", - "tags": [], + "tags": [ + "confluent", + "kafka-client", + "kafka-quota" + ], "templating": { "list": [ { "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { "selected": false, - "text": "dev", - "value": "dev" + "tags": [], + "text": null, + "value": null }, "datasource": "Prometheus", - "definition": "label_values(env)", - "description": null, - "error": null, "hide": 0, "includeAll": false, "label": "Environment", "multi": false, "name": "env", "options": [], - "query": { - "query": "label_values(env)", - "refId": "Prometheus-env-Variable-Query" - }, + "query": "label_values(env)", "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, "type": "query", "useTags": false }, { "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { - "selected": true, - "text": [ - "All" - ], - "value": [ - "$__all" - ] + "selected": false, + "tags": [], + "text": null, + "value": null }, "datasource": "Prometheus", - "definition": "label_values(kafka_server_kafkaserver_brokerstate, instance)", - "description": null, - "error": null, "hide": 0, "includeAll": true, - "label": "Instance", + "label": "Broker", "multi": true, - "name": "broker_id", + "name": "broker", "options": [], - "query": { - "query": "label_values(kafka_server_kafkaserver_brokerstate, instance)", - "refId": "Prometheus-broker_id-Variable-Query" - }, + "query": "label_values(kafka_server_produce_byte_rate{env=\"$env\"},hostname)", "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, "type": "query", "useTags": false }, { - "allValue": ".*", + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { - "selected": true, - "text": [ - "All" - ], - "value": [ - "$__all" - ] + "selected": false, + "tags": [], + "text": null, + "value": null }, "datasource": "Prometheus", - "definition": "label_values(user)", - "description": null, - "error": null, "hide": 0, "includeAll": true, "label": "User", "multi": true, "name": "user", "options": [], - "query": { - "query": "label_values(user)", - "refId": "Prometheus-user-Variable-Query" - }, + "query": "label_values(user)", "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, "type": "query", "useTags": false }, { - "allValue": ".*", + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { - "selected": true, - "text": [ - "All" - ], - "value": [ - "$__all" - ] + "selected": false, + "tags": [], + "text": null, + "value": null }, "datasource": "Prometheus", - "definition": "label_values(client_id)", - "description": null, - "error": null, "hide": 0, "includeAll": true, - "label": "Client Id", + "label": "Client ID", "multi": true, "name": "client_id", "options": [], - "query": { - "query": "label_values(client_id)", - "refId": "Prometheus-client_id-Variable-Query" - }, + "query": "label_values(client_id)", "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, "type": "query", "useTags": false } ] }, "time": { - "from": "now-12h", + "from": "now-1h", "to": "now" }, "timepicker": { + "hidden": false, "refresh_intervals": [ + "5s", "10s", "30s", "1m", @@ -727,10 +759,21 @@ "1h", "2h", "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" ] }, - "timezone": "", - "title": "Kafka Quotas", - "uid": "cwWEgYqMz", - "version": 2 -} \ No newline at end of file + "timezone": "browser", + "title": "Kafka Quotas - v2", + "uid": null, + "version": 0 +} diff --git a/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-topics.json b/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-topics.json index 1fe01cf4..7354ae5c 100644 --- a/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-topics.json +++ b/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-topics.json @@ -1,109 +1,71 @@ { + "__inputs": [ + { + "description": "", + "label": "Prometheus", + "name": "DS_PROMETHEUS", + "pluginId": "prometheus", + "pluginName": "Prometheus", + "type": "datasource" + } + ], "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "target": { - "limit": 100, - "matchAny": false, - "tags": [], - "type": "dashboard" - }, - "type": "dashboard" - } - ] + "list": [] }, + "description": "Overview of the Kafka topics", "editable": true, "gnetId": null, - "graphTooltip": 0, - "id": 4, - "iteration": 1647426704713, + "hideControls": false, + "id": null, "links": [], "panels": [ { + "cacheTimeout": null, "collapsed": false, "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 0 - }, - "id": 19, - "panels": [], - "title": "Overview", - "type": "row" - }, - { - "datasource": null, + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "orange", - "value": 1000 - }, - { - "color": "red", - "value": 10000 - } - ] + "steps": [] } - }, - "overrides": [] + } }, "gridPos": { - "h": 5, - "w": 4, + "h": 1, + "w": 24, "x": 0, - "y": 1 - }, - "id": 9, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto" + "y": 0 }, - "pluginVersion": "8.1.3", - "targets": [ - { - "exemplar": true, - "expr": "sum(kafka_controller_kafkacontroller_globaltopiccount{job=\"kafka-broker\",env=~\"$env\"})", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], + "height": null, + "hideTimeOverride": false, + "id": 1, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], "timeFrom": null, "timeShift": null, - "title": "Total # of Topics", - "type": "stat" + "title": "Throughput", + "transformations": [], + "transparent": false, + "type": "row" }, { + "cacheTimeout": null, "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -114,7 +76,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -125,50 +87,47 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, + "showPoints": "auto", + "spanNulls": false, "stacking": { - "group": "A", - "mode": "none" + "mode": "normal" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "short" + "unit": "cps" }, "overrides": [] }, "gridPos": { "h": 10, - "w": 13, - "x": 4, - "y": 1 + "w": 12, + "x": 0, + "y": 0 }, + "height": null, + "hideTimeOverride": false, "id": 2, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -177,23 +136,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "topk(10, sum without(instance) (rate(kafka_server_brokertopicmetrics_messagesinpersec{job=\"kafka-broker\",topic=~\"$topic\",env=~\"$env\"}[5m])))", + "datasource": null, + "expr": "topk(10, sum without(instance,pod,statefulset_kubernetes_io_pod_name) (rate(kafka_server_brokertopicmetrics_messagesinpersec{topic=~\"$topic\",env=\"$env\"}[5m])))", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{topic}}", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Messages In", + "title": "Messages In/Sec", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -204,7 +178,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -215,32 +189,22 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, + "showPoints": "auto", + "spanNulls": false, "stacking": { - "group": "A", - "mode": "none" + "mode": "normal" }, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "bytes" }, @@ -248,17 +212,24 @@ }, "gridPos": { "h": 10, - "w": 7, - "x": 17, - "y": 1 + "w": 12, + "x": 12, + "y": 0 }, - "id": 17, + "height": null, + "hideTimeOverride": false, + "id": 3, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -267,82 +238,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "topk(10, sum(kafka_log_log_size{job=\"kafka-broker\",env=\"$env\",topic=~\"$topic\"}) by (topic))", + "datasource": null, + "expr": "topk(10, sum(kafka_log_log_size{env=\"$env\",topic=~\"$topic\"}) by (topic))", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{topic}}", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, "title": "Log size", + "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": null, - "fieldConfig": { - "defaults": { - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 10000 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 5, - "w": 4, - "x": 0, - "y": 6 - }, - "id": 11, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "auto", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "exemplar": true, - "expr": "sum(kafka_controller_kafkacontroller_globalpartitioncount{job=\"kafka-broker\",env=~\"$env\"})", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Total # of Partitions", - "type": "stat" - }, - { + "cacheTimeout": null, "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -353,7 +280,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -364,13 +291,13 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, + "showPoints": "auto", + "spanNulls": false, "stacking": { - "group": "A", - "mode": "none" + "mode": "normal" }, "thresholdsStyle": { "mode": "off" @@ -379,34 +306,32 @@ "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "binBps" }, "overrides": [] }, "gridPos": { - "h": 9, + "h": 10, "w": 12, "x": 0, - "y": 11 + "y": 1 }, - "id": 3, + "height": null, + "hideTimeOverride": false, + "id": 4, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -415,23 +340,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "topk(10, sum without(instance) (rate(kafka_server_brokertopicmetrics_bytesinpersec{job=\"kafka-broker\",topic=~\"$topic\",env=~\"$env\"}[5m])))", + "datasource": null, + "expr": "topk(10, sum without(instance,pod,statefulset_kubernetes_io_pod_name) (rate(kafka_server_brokertopicmetrics_bytesinpersec{topic=~\"$topic\",env=\"$env\"}[5m])))", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{topic}}", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Bytes In", + "title": "Bytes In/Sec", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -442,7 +382,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -453,13 +393,13 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, + "showPoints": "auto", + "spanNulls": false, "stacking": { - "group": "A", - "mode": "none" + "mode": "normal" }, "thresholdsStyle": { "mode": "off" @@ -468,34 +408,32 @@ "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "binBps" }, "overrides": [] }, "gridPos": { - "h": 9, + "h": 10, "w": 12, "x": 12, - "y": 11 + "y": 1 }, - "id": 4, + "height": null, + "hideTimeOverride": false, + "id": 5, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -504,24 +442,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "topk(10, sum without(instance) (rate(kafka_server_brokertopicmetrics_bytesoutpersec{job=\"kafka-broker\",topic=~\"$topic\",env=~\"$env\"}[5m])))", + "datasource": null, + "expr": "topk(10, sum without(instance,pod,statefulset_kubernetes_io_pod_name) (rate(kafka_server_brokertopicmetrics_bytesoutpersec{topic=~\"$topic\",env=\"$env\"}[5m])))", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{topic}}", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Bytes Out", + "title": "Bytes Out/Sec", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -532,7 +484,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -543,50 +495,47 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, + "showPoints": "auto", + "spanNulls": false, "stacking": { - "group": "A", - "mode": "none" + "mode": "normal" }, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "short" + "unit": "reqps" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 10, "w": 12, "x": 0, - "y": 20 + "y": 2 }, - "id": 13, + "height": null, + "hideTimeOverride": false, + "id": 6, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -595,24 +544,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "topk(10, sum(rate(kafka_server_brokertopicmetrics_totalproducerequestspersec{job=\"kafka-broker\", env=\"$env\", topic=~\"$topic\"}[5m])) by (topic))", + "datasource": null, + "expr": "topk(10, sum(rate(kafka_server_brokertopicmetrics_totalproducerequestspersec{ env=\"$env\", topic=~\"$topic\"}[5m])) by (topic))", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{topic}}", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Produce Request per sec", + "title": "Produce Requests/Sec", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -623,7 +586,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -634,50 +597,47 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, + "showPoints": "auto", + "spanNulls": false, "stacking": { - "group": "A", - "mode": "none" + "mode": "normal" }, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "short" + "unit": "reqps" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 10, "w": 12, "x": 12, - "y": 20 + "y": 2 }, - "id": 15, + "height": null, + "hideTimeOverride": false, + "id": 7, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -686,139 +646,127 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "topk(10, sum(rate(kafka_server_brokertopicmetrics_totalfetchrequestspersec{job=\"kafka-broker\", env=\"$env\",topic=~\"$topic\"}[5m])) by (topic))", + "datasource": null, + "expr": "topk(10, sum(rate(kafka_server_brokertopicmetrics_totalfetchrequestspersec{ env=\"$env\", topic=~\"$topic\"}[5m])) by (topic))", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{topic}}", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Fetch Request per sec", + "title": "Consumer Fetch Requests/Sec", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 28 + "y": 3 }, - "id": 21, + "height": null, + "hideTimeOverride": false, + "id": 8, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { + "cacheTimeout": null, + "color": { + "mode": "thresholds" + }, + "columns": [], "datasource": "Prometheus", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "custom": { - "align": null, + "align": "auto", "displayMode": "auto", - "filterable": false + "filterable": true }, - "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] } }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "partition" - }, - "properties": [ - { - "id": "custom.width", - "value": 103 - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "offset" - }, - "properties": [ - { - "id": "custom.width", - "value": 137 - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "instance" - }, - "properties": [ - { - "id": "custom.width", - "value": 155 - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "topic" - }, - "properties": [ - { - "id": "custom.width", - "value": 294 - } - ] - } - ] + "overrides": [] }, + "fontSize": "100%", "gridPos": { - "h": 8, + "h": 10, "w": 12, "x": 0, - "y": 29 + "y": 3 }, - "id": 6, + "height": null, + "hideTimeOverride": false, + "id": 9, + "interval": null, + "links": [], + "mappings": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { - "showHeader": true, - "sortBy": [ - { - "desc": false, - "displayName": "partition" - } - ] + "showHeader": true }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": 6, "targets": [ { - "exemplar": true, - "expr": "kafka_log_log_logstartoffset{job=\"kafka-broker\",env=~\"$env\",topic=\"$topic\"}", + "datasource": null, + "expr": "kafka_log_log_logstartoffset{env=\"$env\",topic=~\"$topic\"}", "format": "table", + "hide": false, "instant": true, "interval": "", - "legendFormat": "", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{topic}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Start Offset", + "title": "Start Offsets", "transformations": [ { "id": "organize", @@ -826,123 +774,132 @@ "excludeByName": { "Time": true, "__name__": true, - "env": true, - "instance": false, - "job": true + "app": true, + "clusterId": true, + "confluentPlatform": true, + "confluent_platform": true, + "controller_revision_hash": true, + "instance": true, + "job": true, + "namespace": true, + "platform_confluent_io_type": true, + "statefulset_kubernetes_io_pod_name": true, + "type": true }, "indexByName": { - "Time": 0, - "Value": 7, - "__name__": 1, - "env": 2, - "instance": 3, - "job": 4, - "partition": 6, - "topic": 5 + "Value": 4, + "partition": 3, + "pod": 1, + "topic": 2 }, "renameByName": { "Value": "offset" } } + }, + { + "id": "convertFieldType", + "options": { + "conversions": [ + { + "destinationType": "number", + "targetField": "partition" + } + ], + "fields": {} + } + }, + { + "id": "sortBy", + "options": { + "fields": {}, + "sort": [ + { + "field": "topic" + } + ] + } + }, + { + "id": "sortBy", + "options": { + "fields": {}, + "sort": [ + { + "field": "partition" + } + ] + } } ], + "transparent": false, "type": "table" }, { + "cacheTimeout": null, + "color": { + "mode": "thresholds" + }, + "columns": [], "datasource": "Prometheus", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "custom": { - "align": null, + "align": "auto", "displayMode": "auto", - "filterable": false + "filterable": true }, - "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] } }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "partition" - }, - "properties": [ - { - "id": "custom.width", - "value": 103 - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "offset" - }, - "properties": [ - { - "id": "custom.width", - "value": 105 - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "topic" - }, - "properties": [ - { - "id": "custom.width", - "value": 289 - } - ] - } - ] + "overrides": [] }, + "fontSize": "100%", "gridPos": { - "h": 8, + "h": 10, "w": 12, "x": 12, - "y": 29 + "y": 3 }, - "id": 7, + "height": null, + "hideTimeOverride": false, + "id": 10, + "interval": null, + "links": [], + "mappings": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { - "showHeader": true, - "sortBy": [ - { - "desc": false, - "displayName": "partition" - } - ] + "showHeader": true }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": 6, "targets": [ { - "exemplar": true, - "expr": "kafka_log_log_logendoffset{job=\"kafka-broker\",env=~\"$env\",topic=\"$topic\"}", + "datasource": null, + "expr": "kafka_log_log_logendoffset{env=\"$env\",topic=~\"$topic\"}", "format": "table", + "hide": false, "instant": true, "interval": "", - "legendFormat": "", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{topic}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "End Offset", + "title": "End Offsets", "transformations": [ { "id": "organize", @@ -950,112 +907,179 @@ "excludeByName": { "Time": true, "__name__": true, - "env": true, - "instance": false, - "job": true + "app": true, + "clusterId": true, + "confluentPlatform": true, + "confluent_platform": true, + "controller_revision_hash": true, + "instance": true, + "job": true, + "namespace": true, + "platform_confluent_io_type": true, + "statefulset_kubernetes_io_pod_name": true, + "type": true }, "indexByName": { - "Time": 0, - "Value": 7, - "__name__": 1, - "env": 2, - "instance": 3, - "job": 4, - "partition": 6, - "topic": 5 + "Value": 4, + "partition": 3, + "pod": 1, + "topic": 2 }, "renameByName": { "Value": "offset" } } + }, + { + "id": "convertFieldType", + "options": { + "conversions": [ + { + "destinationType": "number", + "targetField": "partition" + } + ], + "fields": {} + } + }, + { + "id": "sortBy", + "options": { + "fields": {}, + "sort": [ + { + "field": "topic" + } + ] + } + }, + { + "id": "sortBy", + "options": { + "fields": {}, + "sort": [ + { + "field": "partition" + } + ] + } } ], + "transparent": false, "type": "table" } ], - "title": "Topic offsets", + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Offsets", + "transformations": [], + "transparent": false, "type": "row" } ], - "refresh": "1m", - "schemaVersion": 30, + "refresh": "30s", + "rows": [], + "schemaVersion": 12, + "sharedCrosshair": false, "style": "dark", - "tags": [], + "tags": [ + "confluent", + "kafka" + ], "templating": { "list": [ { "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { "selected": false, - "text": "dev", - "value": "dev" + "tags": [], + "text": null, + "value": null }, "datasource": "Prometheus", - "definition": "label_values(env)", - "description": null, - "error": null, "hide": 0, "includeAll": false, "label": "Environment", "multi": false, "name": "env", "options": [], - "query": { - "query": "label_values(env)", - "refId": "Prometheus-env-Variable-Query" - }, + "query": "label_values(env)", "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, "type": "query", "useTags": false }, { "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { - "selected": true, - "text": [ - "All" - ], - "value": [ - "$__all" - ] + "selected": false, + "tags": [], + "text": null, + "value": null }, "datasource": "Prometheus", - "definition": "label_values(kafka_log_log_size,topic)", - "description": null, - "error": null, "hide": 0, "includeAll": true, - "label": "Topic name", + "label": "Topic", "multi": true, "name": "topic", "options": [], - "query": { - "query": "label_values(kafka_log_log_size,topic)", - "refId": "Prometheus-topic-Variable-Query" - }, + "query": "label_values(kafka_log_log_size{env=\"$env\"}, topic)", "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, "type": "query", "useTags": false } ] }, "time": { - "from": "now-12h", + "from": "now-1h", "to": "now" }, - "timepicker": {}, - "timezone": "", - "title": "Kafka Topics", - "uid": "vQT4b1-Mz", - "version": 1 -} \ No newline at end of file + "timepicker": { + "hidden": false, + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Kafka topics - v2", + "uid": null, + "version": 0 +} diff --git a/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/ksqldb-cluster.json b/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/ksqldb-cluster.json index 37ed490b..0452da9c 100644 --- a/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/ksqldb-cluster.json +++ b/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/ksqldb-cluster.json @@ -1,36 +1,38 @@ { + "__inputs": [ + { + "description": "", + "label": "Prometheus", + "name": "DS_PROMETHEUS", + "pluginId": "prometheus", + "pluginName": "Prometheus", + "type": "datasource" + } + ], "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "target": { - "limit": 100, - "matchAny": false, - "tags": [], - "type": "dashboard" - }, - "type": "dashboard" - } - ] + "list": [] }, + "description": "Overview of ksqlDB clusters.", "editable": true, "gnetId": null, - "graphTooltip": 0, - "id": 10, - "iteration": 1632254575966, + "hideControls": false, + "id": null, "links": [], "panels": [ { + "cacheTimeout": null, "collapsed": false, "datasource": null, + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { "h": 1, @@ -38,42 +40,48 @@ "x": 0, "y": 0 }, - "id": 29, + "height": null, + "hideTimeOverride": false, + "id": 1, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "Overview", + "transformations": [], + "transparent": false, "type": "row" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Average number of active queries per server.", + "description": "ksqlDB online instances returning metrics.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#d44a3a", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 0 - }, - { - "color": "#299c46", - "value": 1 + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] }, @@ -82,76 +90,81 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 0, - "y": 1 + "y": 0 }, - "id": 18, + "height": null, + "hideTimeOverride": false, + "id": 2, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "avg(ksql_ksql_engine_query_stats_num_active_queries{job=\"ksqldb\", env=\"$env\", ksql_cluster=\"$ksqldb_cluster_id\", instance=~\"$instance\"})", - "instant": true, + "datasource": null, + "expr": "count(ksql_ksql_engine_query_stats_num_active_queries{env=\"$env\", ksqldb_cluster_id=\"$ksqldb_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Active Queries", + "title": "ksqlDB: Online Servers", + "transformations": [], + "transparent": false, "type": "stat" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Num of created queries", + "description": "Number of active queries deployed in the cluster.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#299c46", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 500 - }, - { - "color": "#d44a3a", - "value": 800 + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] }, @@ -160,75 +173,89 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 4, - "y": 1 + "y": 0 }, - "id": 20, + "height": null, + "hideTimeOverride": false, + "id": 3, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "avg(ksql_ksql_engine_query_stats_running_queries{job=\"ksqldb\", env=\"$env\", ksql_cluster=\"$ksqldb_cluster_id\", instance=~\"$instance\"})", + "datasource": null, + "expr": "sum(ksql_ksql_engine_query_stats_num_active_queries{env=\"$env\", ksqldb_cluster_id=\"$ksqldb_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Running Queries", + "title": "ksqlDB: Sum of Active Queries", + "transformations": [], + "transparent": false, "type": "stat" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Num of persisted queries", + "description": "Number of running queries deployed in the cluster.\n Ideally, this number should be equal to the number of active queries as queries should be running.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#d44a3a", - "value": null + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" }, { - "color": "rgba(237, 129, 40, 0.89)", - "value": 0 - }, - { - "color": "#299c46", - "value": 1 + "color": "green", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" } ] }, @@ -237,75 +264,89 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 8, - "y": 1 + "y": 0 }, - "id": 2, + "height": null, + "hideTimeOverride": false, + "id": 4, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "avg(ksql_ksql_engine_query_stats_num_persistent_queries{job=\"ksqldb\", env=\"$env\", ksql_cluster=\"$ksqldb_cluster_id\", instance=~\"$instance\"})", + "datasource": null, + "expr": "sum(ksql_ksql_engine_query_stats_running_queries{env=\"$env\", ksqldb_cluster_id=\"$ksqldb_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Total Persisted Queries", + "title": "ksqlDB: Sum of Running Queries", + "transformations": [], + "transparent": false, "type": "stat" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Num of rebalancing queries", + "description": "Number of queries rebalancing in the cluster.\n Ideally, this number should be equal zero, or return to zero in a short period (e.g. 1 minute).\n It's recommended to alert if the number of rebalancing queries stay higher than 0 for a longer period of time.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#299c46", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 1 + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" }, { - "color": "#d44a3a", - "value": 5 + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" } ] }, @@ -314,330 +355,89 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 12, - "y": 1 - }, - "id": 16, - "interval": null, - "links": [], - "maxDataPoints": 100, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "expr": "sum(ksql_ksql_engine_query_stats_rebalancing_queries{job=\"ksqldb\", env=\"$env\", ksql_cluster=\"$ksqldb_cluster_id\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Rebalancing Queries", - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Number of error query", - "fieldConfig": { - "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "#299c46", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 1 - }, - { - "color": "#d44a3a", - "value": 2 - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 16, - "y": 1 + "y": 0 }, - "id": 4, + "height": null, + "hideTimeOverride": false, + "id": 5, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "avg(ksql_ksql_engine_query_stats_error_queries{job=\"ksqldb\", env=\"$env\", ksql_cluster=\"$ksqldb_cluster_id\", instance=~\"$instance\"})", + "datasource": null, + "expr": "sum(ksql_ksql_engine_query_stats_rebalancing_queries{env=\"$env\", ksqldb_cluster_id=\"$ksqldb_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Queries in Error State", + "title": "ksqlDB: Sum of Rebalancing Queries", + "transformations": [], + "transparent": false, "type": "stat" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Num of idle queries", + "description": "Number of queries failed in the cluster.\n Ideally, this number should be equal zero.\n It's recommended to alert if the number of queries failed is higher than 0.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "#299c46", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 1 - }, - { - "color": "#d44a3a", - "value": 5 - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 20, - "y": 1 - }, - "id": 19, - "interval": null, - "links": [], - "maxDataPoints": 100, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "expr": "sum(ksql_ksql_engine_query_stats_num_idle_queries{job=\"ksqldb\", env=\"$env\", ksql_cluster=\"$ksqldb_cluster_id\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Idle Queries", - "type": "stat" - }, - { - "datasource": "Prometheus", - "fieldConfig": { - "defaults": { - "custom": { - "align": null, - "displayMode": "auto", - "filterable": false - }, + "custom": {}, + "decimals": null, "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" }, { "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "ksql_query" - }, - "properties": [ - { - "id": "custom.width", - "value": 426 - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "instance" - }, - "properties": [ - { - "id": "custom.width", - "value": 381 - } - ] - } - ] - }, - "gridPos": { - "h": 9, - "w": 16, - "x": 0, - "y": 5 - }, - "id": 23, - "options": { - "showHeader": true, - "sortBy": [] - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "expr": "ksql_ksql_metrics_ksql_queries_query_status{job=\"ksqldb\", env=\"$env\", ksql_cluster=\"$ksqldb_cluster_id\", instance=~\"$instance\"}", - "format": "table", - "instant": true, - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Queries Status", - "transformations": [ - { - "id": "organize", - "options": { - "excludeByName": { - "Time": true, - "Value": true, - "__name__": true, - "env": true, - "job": true, - "ksql_cluster": true - }, - "indexByName": {}, - "renameByName": { - "Time": "", - "__name__": "", - "instance": "", - "ksql_cluster": "", - "ksql_query": "" - } - } - } - ], - "type": "table" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Num of not running queries", - "fieldConfig": { - "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "#299c46", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 1 - }, - { - "color": "#d44a3a", - "value": 5 + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" } ] }, @@ -646,125 +446,65 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 16, - "y": 5 - }, - "id": 5, - "interval": null, - "links": [], - "maxDataPoints": 100, - "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "text": {}, - "textMode": "auto" - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "expr": "sum(ksql_ksql_engine_query_stats_not_running_queries{job=\"ksqldb\", env=\"$env\", ksql_cluster=\"$ksqldb_cluster_id\", instance=~\"$instance\"})", - "interval": "", - "legendFormat": "", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Stopped Queries", - "type": "stat" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": "Num of running queries", - "fieldConfig": { - "defaults": { - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "#299c46", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 1 - }, - { - "color": "#d44a3a", - "value": 5 - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 4, - "w": 4, - "x": 20, - "y": 5 + "y": 0 }, - "id": 15, + "height": null, + "hideTimeOverride": false, + "id": 6, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(ksql_ksql_engine_query_stats_pending_shutdown_queries{job=\"ksqldb\", env=\"$env\", ksql_cluster=\"$ksqldb_cluster_id\", instance=~\"$instance\"})", + "datasource": null, + "expr": "avg(ksql_ksql_engine_query_stats_error_queries{env=\"$env\", ksqldb_cluster_id=\"$ksqldb_cluster\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Currently Shutting Down Queries", + "title": "Connect: Sum of Queries Failed", + "transformations": [], + "transparent": false, "type": "stat" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Cluster liveness", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -775,7 +515,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -786,78 +526,85 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "max": 1, - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "short" + "unit": "" }, "overrides": [] }, "gridPos": { - "h": 5, + "h": 10, "w": 8, - "x": 16, - "y": 9 + "x": 0, + "y": 1 }, - "id": 17, + "height": null, + "hideTimeOverride": false, + "id": 7, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ - "lastNotNull" + "max", + "mean", + "last" ], - "displayMode": "list", + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "ksql_ksql_engine_query_stats_liveness_indicator{job=\"ksqldb\", env=\"$env\", ksql_cluster=\"$ksqldb_cluster_id\", instance=~\"$instance\"}", + "datasource": null, + "expr": "ksql_ksql_engine_query_stats_liveness_indicator{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Cluster liveness", + "title": "Cluster Liveness", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "Message consumed/sec", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -868,7 +615,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -879,51 +626,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "short" + "unit": "cps" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 14 + "h": 10, + "w": 8, + "x": 8, + "y": 1 }, - "id": 21, + "height": null, + "hideTimeOverride": false, + "id": 8, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -932,23 +673,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "ksql_ksql_engine_query_stats_messages_consumed_per_sec{job=\"ksqldb\", env=\"$env\", ksql_cluster=\"$ksqldb_cluster_id\", instance=~\"$instance\"}", + "datasource": null, + "expr": "ksql_ksql_engine_query_stats_messages_consumed_per_sec{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Message consumed/sec", + "title": "Messages consumed/sec", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "Message produced/sec", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -959,7 +715,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -970,51 +726,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "short" + "unit": "cps" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 12, - "x": 12, - "y": 14 + "h": 10, + "w": 8, + "x": 16, + "y": 1 }, - "id": 7, + "height": null, + "hideTimeOverride": false, + "id": 9, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -1023,41 +773,79 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "irate(ksql_ksql_engine_query_stats_messages_produced_per_sec{job=\"ksqldb\", env=\"$env\", ksql_cluster=\"$ksqldb_cluster_id\", instance=~\"$instance\"}[5m])", + "datasource": null, + "expr": "ksql_ksql_engine_query_stats_messages_produced_per_sec{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Message produced/sec", + "title": "Messages produced/sec", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "collapsed": false, "datasource": null, + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 22 + "y": 2 }, - "id": 33, + "height": null, + "hideTimeOverride": false, + "id": 10, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "System", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": "Rate of CPU seconds used by the Java process.\n 100% usage represents one core. \n If there are multiple cores, the total capacity should be 100% * number_cores.", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -1068,7 +856,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -1079,50 +867,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "percentunit" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 10, "w": 8, "x": 0, - "y": 23 + "y": 2 }, - "id": 12, + "height": null, + "hideTimeOverride": false, + "id": 11, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -1131,23 +914,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "irate(process_cpu_seconds_total{job=\"ksqldb\", env=\"$env\", instance=~\"$instance\"}[5m])", + "datasource": null, + "expr": "irate(process_cpu_seconds_total{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\"}[5m])", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "CPU Usage", + "title": "CPU usage", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": "Sum of JVM memory used, without including areas (e.g. heap size).", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -1158,7 +956,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -1169,50 +967,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "decbytes" + "unit": "bytes" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 10, "w": 8, "x": 8, - "y": 23 + "y": 2 }, - "id": 24, + "height": null, + "hideTimeOverride": false, + "id": 12, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -1221,29 +1014,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum without(area)(jvm_memory_bytes_used{job=\"ksqldb\", env=\"$env\",instance=~\"$instance\"})", - "interval": "", - "legendFormat": "Used:{{instance}}", - "refId": "A" - }, - { - "expr": "jvm_memory_bytes_max{job=\"ksqldb\",env=\"$env\",area=\"heap\"}", + "datasource": null, + "expr": "sum without(area)(jvm_memory_bytes_used{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "Max:{{instance}}", - "refId": "B" + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "JVM Memory Used", + "title": "Memory usage", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": "Sum of seconds used by Garbage Collection.", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -1254,7 +1056,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -1265,52 +1067,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "decimals": 4, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "short" + "unit": "percentunit" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 10, "w": 8, "x": 16, - "y": 23 + "y": 2 }, - "id": 14, + "height": null, + "hideTimeOverride": false, + "id": 13, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -1319,38 +1114,68 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum without(gc)(rate(jvm_gc_collection_seconds_sum{job=\"ksqldb\", env=\"$env\", instance=~\"$instance\"}[5m]))", + "datasource": null, + "expr": "sum without(gc)(irate(jvm_gc_collection_seconds_sum{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\"}[5m]))", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Time spent in GC", + "title": "GC collection", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 31 + "y": 3 }, - "id": 31, + "height": null, + "hideTimeOverride": false, + "id": 14, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -1372,50 +1197,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 11, - "w": 6, + "h": 10, + "w": 8, "x": 0, - "y": 32 + "y": 3 }, - "id": 26, + "height": null, + "hideTimeOverride": false, + "id": 15, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -1424,30 +1244,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_streams_stream_thread_metrics_poll_latency_avg{job=\"ksqldb\", env=\"$env\", thread_id=~\".+$ksqldb_cluster_id.+\", instance=~\"$instance\"}", + "datasource": null, + "expr": "kafka_streams_stream_thread_metrics_poll_latency_avg{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", "hide": false, "instant": false, "interval": "", - "legendFormat": "{{thread_id}}_avg", - "refId": "A" - }, - { - "refId": "C" + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Poll Latency (Avg)", + "title": "Poll Latency (Avg.)", "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -1466,75 +1294,88 @@ "viz": false }, "lineInterpolation": "linear", - "lineWidth": 2, + "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 11, - "w": 6, - "x": 6, - "y": 32 - }, - "id": 35, + "h": 10, + "w": 8, + "x": 8, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 16, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { - "calcs": [], - "displayMode": "list", + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_streams_stream_thread_metrics_poll_latency_max{job=\"ksqldb\", env=\"$env\", thread_id=~\".+$ksqldb_cluster_id.+\", instance=~\"$instance\"}", + "datasource": null, + "expr": "kafka_streams_stream_thread_metrics_poll_latency_max{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{thread_id}}_max", - "refId": "B" + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Poll Latency (Max)", + "title": "Poll Latency (Max.)", "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -1556,74 +1397,85 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 11, - "w": 6, - "x": 12, - "y": 32 + "h": 10, + "w": 8, + "x": 0, + "y": 4 }, - "id": 25, + "height": null, + "hideTimeOverride": false, + "id": 17, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { - "calcs": [], - "displayMode": "list", + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_streams_stream_thread_metrics_process_latency_avg{job=\"ksqldb\", env=\"$env\", thread_id=~\".+$ksqldb_cluster_id.+\", instance=~\"$instance\"}", + "datasource": null, + "expr": "kafka_streams_stream_thread_metrics_process_latency_avg{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", + "hide": false, "instant": false, "interval": "", - "legendFormat": "{{thread_id}}_avg", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Process Latency (Avg)", + "title": "Process Latency (Avg.)", "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -1642,75 +1494,88 @@ "viz": false }, "lineInterpolation": "linear", - "lineWidth": 2, + "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 11, - "w": 6, - "x": 18, - "y": 32 - }, - "id": 34, + "h": 10, + "w": 8, + "x": 8, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 18, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { - "calcs": [], - "displayMode": "list", + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_streams_stream_thread_metrics_process_latency_max{job=\"ksqldb\", env=\"$env\", thread_id=~\".+$ksqldb_cluster_id.+\", instance=~\"$instance\"}", + "datasource": null, + "expr": "kafka_streams_stream_thread_metrics_process_latency_max{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{thread_id}}_max", - "refId": "B" + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Process Latency Max", + "title": "Process Latency (Max.)", "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -1732,74 +1597,85 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 11, - "w": 6, + "h": 10, + "w": 8, "x": 0, - "y": 43 + "y": 5 }, - "id": 13, + "height": null, + "hideTimeOverride": false, + "id": 19, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { - "calcs": [], - "displayMode": "list", + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_streams_stream_thread_metrics_commit_latency_avg{job=\"ksqldb\", env=\"$env\", thread_id=~\".+$ksqldb_cluster_id.+\", instance=~\"$instance\"}", + "datasource": null, + "expr": "kafka_streams_stream_thread_metrics_commit_latency_avg{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", + "hide": false, "instant": false, "interval": "", - "legendFormat": "{{thread_id}}_avg", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Commit Latency (Avg)", + "title": "Commit Latency (Avg.)", "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -1821,74 +1697,85 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 11, - "w": 6, - "x": 6, - "y": 43 - }, - "id": 38, + "h": 10, + "w": 8, + "x": 8, + "y": 5 + }, + "height": null, + "hideTimeOverride": false, + "id": 20, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { - "calcs": [], - "displayMode": "list", + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_streams_stream_thread_metrics_commit_latency_max{job=\"ksqldb\", env=\"$env\", thread_id=~\".+$ksqldb_cluster_id.+\", instance=~\"$instance\"}", + "datasource": null, + "expr": "kafka_streams_stream_thread_metrics_commit_latency_max{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", + "hide": false, "instant": false, "interval": "", - "legendFormat": "{{thread_id}}_avg", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Commit Latency (Max)", + "title": "Commit Latency (Max.)", "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -1910,74 +1797,85 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 11, - "w": 6, - "x": 12, - "y": 43 + "h": 10, + "w": 8, + "x": 0, + "y": 6 }, - "id": 27, + "height": null, + "hideTimeOverride": false, + "id": 21, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { - "calcs": [], - "displayMode": "list", + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_streams_stream_thread_metrics_punctuate_latency_avg{job=\"ksqldb\", env=\"$env\", thread_id=~\".+$ksqldb_cluster_id.+\", instance=~\"$instance\"}", + "datasource": null, + "expr": "kafka_streams_stream_thread_metrics_punctuate_latency_avg{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", + "hide": false, "instant": false, "interval": "", - "legendFormat": "{{thread_id}}_avg", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Punctuate Latency (Avg)", + "title": "Punctuate Latency (Avg.)", "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -1996,113 +1894,130 @@ "viz": false }, "lineInterpolation": "linear", - "lineWidth": 3, + "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, - "overrides": [ - { - "matcher": { - "id": "byRegexp", - "options": "/max/" - }, - "properties": [ - { - "id": "custom.lineStyle", - "value": { - "dash": [ - 5, - 2 - ], - "fill": "dash" - } - } - ] - } - ] + "overrides": [] }, "gridPos": { - "h": 11, - "w": 6, - "x": 18, - "y": 43 - }, - "id": 37, + "h": 10, + "w": 8, + "x": 8, + "y": 6 + }, + "height": null, + "hideTimeOverride": false, + "id": 22, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { - "calcs": [], - "displayMode": "list", + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", "placement": "bottom" }, "tooltip": { "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_streams_stream_thread_metrics_punctuate_latency_max{job=\"ksqldb\", env=\"$env\", thread_id=~\".+$ksqldb_cluster_id.+\", instance=~\"$instance\"}", + "datasource": null, + "expr": "kafka_streams_stream_thread_metrics_punctuate_latency_max{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{thread_id}}_max", - "refId": "B" + "intervalFactor": 2, + "legendFormat": "{{thread_id}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Punctuate Latency (Max)", + "title": "Punctuate Latency (Max.)", "transformations": [], + "transparent": false, "type": "timeseries" } ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "Queries Performance", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, "fieldConfig": { - "defaults": {}, - "overrides": [] + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 32 + "y": 7 }, - "id": 40, + "height": null, + "hideTimeOverride": false, + "id": 23, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [ { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -2121,54 +2036,48 @@ "viz": false }, "lineInterpolation": "linear", - "lineWidth": 3, + "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "iops" + "steps": [] + }, + "unit": "ops" }, "overrides": [] }, "gridPos": { - "h": 11, - "w": 6, + "h": 10, + "w": 8, "x": 0, - "y": 33 + "y": 7 }, - "id": 36, + "height": null, + "hideTimeOverride": false, + "id": 24, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -2177,24 +2086,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_streams_stream_state_metrics_put_rate{job=\"ksqldb\", env=\"$env\", thread_id=~\".+$ksqldb_cluster_id.+\", instance=~\"$instance\"}", + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_put_rate{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{thread_id}}", - "refId": "B" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, "title": "Put Rate", "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -2213,53 +2136,48 @@ "viz": false }, "lineInterpolation": "linear", - "lineWidth": 3, + "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 11, - "w": 9, - "x": 6, - "y": 33 + "h": 10, + "w": 8, + "x": 8, + "y": 7 }, - "id": 42, + "height": null, + "hideTimeOverride": false, + "id": 25, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -2268,24 +2186,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_streams_stream_state_metrics_put_latency_avg{job=\"ksqldb\", thread_id=~\".+$ksqldb_cluster_id.+\"}", + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_put_latency_avg{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{thread_id}}", - "refId": "B" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Put average latency", + "title": "Put Latency (Avg.)", "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -2304,53 +2236,48 @@ "viz": false }, "lineInterpolation": "linear", - "lineWidth": 3, + "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 11, - "w": 9, - "x": 15, - "y": 33 + "h": 10, + "w": 8, + "x": 16, + "y": 7 }, - "id": 43, + "height": null, + "hideTimeOverride": false, + "id": 26, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -2359,24 +2286,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_streams_stream_state_metrics_put_latency_max{job=\"ksqldb\", thread_id=~\".+$ksqldb_cluster_id.+\"}", + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_put_latency_max{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{thread_id}}", - "refId": "B" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Put max latency", + "title": "Put Latency (Max.)", "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -2395,54 +2336,48 @@ "viz": false }, "lineInterpolation": "linear", - "lineWidth": 3, + "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "iops" + "steps": [] + }, + "unit": "ops" }, "overrides": [] }, "gridPos": { - "h": 11, - "w": 6, + "h": 10, + "w": 8, "x": 0, - "y": 44 + "y": 8 }, - "id": 52, + "height": null, + "hideTimeOverride": false, + "id": 27, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -2451,24 +2386,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_streams_stream_state_metrics_put_if_absent_rate_rate{job=\"ksqldb\", env=\"$env\", thread_id=~\".+$ksqldb_cluster_id.+\", instance=~\"$instance\"}", + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_put_if_absent_rate{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{thread_id}}", - "refId": "B" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Put if absent rate", + "title": "Put if absent Rate", "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -2487,53 +2436,48 @@ "viz": false }, "lineInterpolation": "linear", - "lineWidth": 3, + "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 11, - "w": 9, - "x": 6, - "y": 44 - }, - "id": 53, + "h": 10, + "w": 8, + "x": 8, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 28, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -2542,24 +2486,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_streams_stream_state_metrics_put_if_absent_latency_avg{job=\"ksqldb\", thread_id=~\".+$ksqldb_cluster_id.+\"}", + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_put_if_absent_latency_avg{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{thread_id}}", - "refId": "B" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Put if absent average latency", + "title": "Put if absent Latency (Avg.)", "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -2578,53 +2536,48 @@ "viz": false }, "lineInterpolation": "linear", - "lineWidth": 3, + "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 11, - "w": 9, - "x": 15, - "y": 44 - }, - "id": 54, + "h": 10, + "w": 8, + "x": 16, + "y": 8 + }, + "height": null, + "hideTimeOverride": false, + "id": 29, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -2633,24 +2586,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_streams_stream_state_metrics_put_if_absent_latency_max{job=\"ksqldb\", thread_id=~\".+$ksqldb_cluster_id.+\"}", + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_put_if_absent_latency_max{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{thread_id}}", - "refId": "B" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Put if absent max latency", + "title": "Put if absent Latency (Max.)", "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -2669,54 +2636,48 @@ "viz": false }, "lineInterpolation": "linear", - "lineWidth": 3, + "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "iops" + "steps": [] + }, + "unit": "ops" }, "overrides": [] }, "gridPos": { - "h": 11, - "w": 6, + "h": 10, + "w": 8, "x": 0, - "y": 55 + "y": 9 }, - "id": 41, + "height": null, + "hideTimeOverride": false, + "id": 30, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -2725,24 +2686,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_streams_stream_state_metrics_fetch_rate{job=\"ksqldb\", env=\"$env\", thread_id=~\".+$ksqldb_cluster_id.+\", instance=~\"$instance\"}", + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_fetch_rate{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{thread_id}}", - "refId": "B" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, "title": "Fetch Rate", "transformations": [], + "transparent": false, "type": "timeseries" }, { - "datasource": null, - "description": "", + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -2761,53 +2736,48 @@ "viz": false }, "lineInterpolation": "linear", - "lineWidth": 3, + "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 11, - "w": 9, - "x": 6, - "y": 55 - }, - "id": 44, + "h": 10, + "w": 8, + "x": 8, + "y": 9 + }, + "height": null, + "hideTimeOverride": false, + "id": 31, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -2816,24 +2786,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_streams_stream_state_metrics_fetch_latency_avg{job=\"ksqldb\", thread_id=~\".+$ksqldb_cluster_id.+\"}", + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_fetch_latency_avg{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{thread_id}}", - "refId": "B" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Fetch average latency", + "title": "Fetch Latency (Avg.)", "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -2852,53 +2836,48 @@ "viz": false }, "lineInterpolation": "linear", - "lineWidth": 3, + "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 11, - "w": 9, - "x": 15, - "y": 55 - }, - "id": 45, + "h": 10, + "w": 8, + "x": 16, + "y": 9 + }, + "height": null, + "hideTimeOverride": false, + "id": 32, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -2907,24 +2886,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_streams_stream_state_metrics_put_latency_max{job=\"ksqldb\", thread_id=~\".+$ksqldb_cluster_id.+\"}", + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_fetch_latency_max{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{thread_id}}", - "refId": "B" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Fetch max latency", + "title": "Fetch Latency (Max.)", "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -2943,54 +2936,48 @@ "viz": false }, "lineInterpolation": "linear", - "lineWidth": 3, + "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "iops" + "steps": [] + }, + "unit": "ops" }, "overrides": [] }, "gridPos": { - "h": 11, - "w": 6, + "h": 10, + "w": 8, "x": 0, - "y": 66 + "y": 10 }, - "id": 46, + "height": null, + "hideTimeOverride": false, + "id": 33, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -2999,24 +2986,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_streams_stream_state_metrics_delete_rate{job=\"ksqldb\", env=\"$env\", thread_id=~\".+$ksqldb_cluster_id.+\", instance=~\"$instance\"}", + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_delete_rate{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{thread_id}}", - "refId": "B" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, "title": "Delete Rate", "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -3035,53 +3036,48 @@ "viz": false }, "lineInterpolation": "linear", - "lineWidth": 3, + "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 11, - "w": 9, - "x": 6, - "y": 66 + "h": 10, + "w": 8, + "x": 8, + "y": 10 }, - "id": 47, + "height": null, + "hideTimeOverride": false, + "id": 34, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -3090,24 +3086,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_streams_stream_state_metrics_delete_latency_avg{job=\"ksqldb\", thread_id=~\".+$ksqldb_cluster_id.+\"}", + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_delete_latency_avg{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{thread_id}}", - "refId": "B" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Delete average latency", + "title": "Delete Latency (Avg.)", "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -3126,53 +3136,48 @@ "viz": false }, "lineInterpolation": "linear", - "lineWidth": 3, + "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 11, - "w": 9, - "x": 15, - "y": 66 + "h": 10, + "w": 8, + "x": 16, + "y": 10 }, - "id": 48, + "height": null, + "hideTimeOverride": false, + "id": 35, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -3181,24 +3186,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_streams_stream_state_metrics_delete_latency_max{job=\"ksqldb\", thread_id=~\".+$ksqldb_cluster_id.+\"}", + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_delete_latency_max{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{thread_id}}", - "refId": "B" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Delete max latency", + "title": "Delete Latency (Max.)", "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -3217,54 +3236,48 @@ "viz": false }, "lineInterpolation": "linear", - "lineWidth": 3, + "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "iops" + "steps": [] + }, + "unit": "ops" }, "overrides": [] }, "gridPos": { - "h": 11, - "w": 6, + "h": 10, + "w": 8, "x": 0, - "y": 77 + "y": 11 }, - "id": 49, + "height": null, + "hideTimeOverride": false, + "id": 36, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -3273,24 +3286,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_streams_stream_state_metrics_restore_rate{job=\"ksqldb\", env=\"$env\", thread_id=~\".+$ksqldb_cluster_id.+\", instance=~\"$instance\"}", + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_restore_rate{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{thread_id}}", - "refId": "B" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, "title": "Restore Rate", "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -3309,53 +3336,48 @@ "viz": false }, "lineInterpolation": "linear", - "lineWidth": 3, + "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 11, - "w": 9, - "x": 6, - "y": 77 + "h": 10, + "w": 8, + "x": 8, + "y": 11 }, - "id": 50, + "height": null, + "hideTimeOverride": false, + "id": 37, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -3364,24 +3386,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_streams_stream_state_metrics_restore_latency_avg{job=\"ksqldb\", thread_id=~\".+$ksqldb_cluster_id.+\"}", + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_restore_latency_avg{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{thread_id}}", - "refId": "B" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Restore average latency", + "title": "Restore Latency (Avg.)", "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -3400,53 +3436,48 @@ "viz": false }, "lineInterpolation": "linear", - "lineWidth": 3, + "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, "unit": "ms" }, "overrides": [] }, "gridPos": { - "h": 11, - "w": 9, - "x": 15, - "y": 77 + "h": 10, + "w": 8, + "x": 16, + "y": 11 }, - "id": 51, + "height": null, + "hideTimeOverride": false, + "id": 38, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -3455,130 +3486,174 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_streams_stream_state_metrics_restore_latency_max{job=\"ksqldb\", thread_id=~\".+$ksqldb_cluster_id.+\"}", + "datasource": null, + "expr": "kafka_streams_stream_state_metrics_restore_latency_max{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "{{thread_id}}", - "refId": "B" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Restore max latency", + "title": "Restore Latency (Max.)", "transformations": [], + "transparent": false, "type": "timeseries" } ], - "title": "StateStore Metric", + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "State Stores", + "transformations": [], + "transparent": false, "type": "row" } ], - "refresh": "1m", - "schemaVersion": 30, + "refresh": "30s", + "rows": [], + "schemaVersion": 12, + "sharedCrosshair": false, "style": "dark", - "tags": [], + "tags": [ + "confluent", + "ksqldb" + ], "templating": { "list": [ { "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { "selected": false, - "text": "dev", - "value": "dev" + "tags": [], + "text": null, + "value": null }, "datasource": "Prometheus", - "definition": "label_values(ksql_ksql_engine_query_stats_liveness_indicator,env)", - "description": null, - "error": null, "hide": 0, "includeAll": false, "label": "Environment", "multi": false, "name": "env", "options": [], - "query": { - "query": "label_values(ksql_ksql_engine_query_stats_liveness_indicator,env)", - "refId": "Prometheus-env-Variable-Query" - }, - "refresh": 2, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", + "query": "label_values(env)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, "type": "query", "useTags": false }, { "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { "selected": false, - "text": "All", - "value": "$__all" + "tags": [], + "text": null, + "value": null }, "datasource": "Prometheus", - "definition": "label_values(ksql_ksql_engine_query_stats_liveness_indicator,ksql_cluster)", - "description": null, - "error": null, "hide": 0, - "includeAll": true, - "label": "Cluster ID", - "multi": true, - "name": "ksqldb_cluster_id", + "includeAll": false, + "label": "ksqlDB cluster", + "multi": false, + "name": "ksqldb_cluster", "options": [], - "query": { - "query": "label_values(ksql_ksql_engine_query_stats_liveness_indicator,ksql_cluster)", - "refId": "Prometheus-ksqldb_cluster_id-Variable-Query" + "query": "label_values(ksql_ksql_engine_query_stats_num_active_queries{env=\"$env\"},ksqldb_cluster_id)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null }, - "refresh": 2, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", + "datasource": "Prometheus", + "hide": 2, + "includeAll": false, + "label": "ksqlDB cluster ID", + "multi": false, + "name": "ksqldb_cluster_id", + "options": [], + "query": "label_values(ksql_ksql_engine_query_stats_num_active_queries{env=\"$env\"},ksql_cluster)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, "type": "query", "useTags": false }, { "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { "selected": false, - "text": "All", - "value": "$__all" + "tags": [], + "text": null, + "value": null }, "datasource": "Prometheus", - "definition": "label_values(ksql_ksql_engine_query_stats_liveness_indicator,instance)", - "description": null, - "error": null, "hide": 0, "includeAll": true, - "label": "Instance", + "label": "ksqlDB server", "multi": true, - "name": "instance", + "name": "ksqldb_server", "options": [], - "query": { - "query": "label_values(ksql_ksql_engine_query_stats_liveness_indicator,instance)", - "refId": "Prometheus-instance-Variable-Query" - }, + "query": "label_values(ksql_ksql_engine_query_stats_num_active_queries{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\"}, hostname)", "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, "type": "query", "useTags": false } ] }, "time": { - "from": "now-12h", + "from": "now-1h", "to": "now" }, "timepicker": { + "hidden": false, "refresh_intervals": [ + "5s", "10s", "30s", "1m", @@ -3588,10 +3663,21 @@ "1h", "2h", "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" ] }, - "timezone": "", - "title": "ksqlDB cluster", - "uid": "pbx34foGk", - "version": 1 -} \ No newline at end of file + "timezone": "browser", + "title": "ksqlDB cluster - v2", + "uid": null, + "version": 0 +} diff --git a/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/schema-registry-cluster.json b/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/schema-registry-cluster.json index 50235352..caf43878 100644 --- a/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/schema-registry-cluster.json +++ b/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/schema-registry-cluster.json @@ -1,686 +1,543 @@ { + "__inputs": [ + { + "description": "", + "label": "Prometheus", + "name": "DS_PROMETHEUS", + "pluginId": "prometheus", + "pluginName": "Prometheus", + "type": "datasource" + } + ], "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "target": { - "limit": 100, - "matchAny": false, - "tags": [], - "type": "dashboard" - }, - "type": "dashboard" - } - ] + "list": [] }, + "description": "Overview of the Schema Registry cluster", "editable": true, "gnetId": null, - "graphTooltip": 0, - "id": 4, - "iteration": 1632254298743, + "hideControls": false, + "id": null, "links": [], "panels": [ { + "cacheTimeout": null, "collapsed": false, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }, - "id": 19, + "height": null, + "hideTimeOverride": false, + "id": 1, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "panels": [], - "title": "Schemas", + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Overview", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "datasource": "Prometheus", + "description": "Schema Registry online instances returning metrics.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { + "custom": {}, + "decimals": null, "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { "color": "red", - "value": null + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" }, { "color": "green", - "value": 1 + "index": 2, + "line": true, + "op": "gt", + "value": 2.0, + "yaxis": "left" } ] - } + }, + "unit": "none" }, "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 0, - "y": 1 + "y": 0 }, + "height": null, + "hideTimeOverride": false, "id": 2, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ - "mean" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "count(kafka_schema_registry_registered_count{job=\"schema-registry\",env=\"$env\"})", - "instant": true, + "datasource": null, + "expr": "count(kafka_schema_registry_registered_count{env=\"$env\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Schema Registry Instances", + "title": "SR: Online instances", + "transformations": [], + "transparent": false, "type": "stat" }, { + "cacheTimeout": null, "datasource": "Prometheus", + "description": "Average number of registered schemas across the cluster.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, + "custom": {}, + "decimals": null, "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] }, - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 12, - "x": 4, - "y": 1 - }, - "id": 11, - "options": { - "legend": { - "calcs": [ - "lastNotNull" - ], - "displayMode": "table", - "placement": "right" - }, - "tooltip": { - "mode": "single" - } - }, - "pluginVersion": "8.1.3", - "targets": [ - { - "exemplar": true, - "expr": "sum by(schema_type) (kafka_schema_registry_schemas_created{job=\"schema-registry\",env=\"$env\"})", - "interval": "", - "legendFormat": "{{schema_type}}", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Schema registered over time", - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - } - }, - "decimals": 0, - "mappings": [], - "unit": "short" + "unit": "none" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 5, "w": 4, - "x": 16, - "y": 1 + "x": 4, + "y": 0 }, - "id": 8, + "height": null, + "hideTimeOverride": false, + "id": 3, "interval": null, "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { - "legend": { - "calcs": [], - "displayMode": "table", - "placement": "bottom", - "values": [ - "value" - ] - }, - "pieType": "pie", + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "tooltip": { - "mode": "single" - } + "textMode": "auto" }, - "pluginVersion": "7.3.4", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum by(schema_type)(kafka_schema_registry_schemas_created{job=\"schema-registry\",env=\"$env\"})", - "instant": true, + "datasource": null, + "expr": "avg(kafka_schema_registry_registered_count{env=\"$env\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{schema_type}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Schemas created", - "type": "piechart" + "title": "SR: Registered Schemas (avg.)", + "transformations": [], + "transparent": false, + "type": "stat" }, { "cacheTimeout": null, "datasource": "Prometheus", + "description": "Average number of schemas created, by type.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - } - }, - "decimals": 0, - "mappings": [], - "unit": "short" - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 4, - "x": 20, - "y": 1 - }, - "id": 9, - "interval": null, - "links": [], - "options": { - "legend": { - "calcs": [], - "displayMode": "table", - "placement": "bottom", - "values": [ - "value" - ] - }, - "pieType": "pie", - "reduceOptions": { - "calcs": [ - "lastNotNull" - ], - "fields": "", - "values": false - }, - "tooltip": { - "mode": "single" - } - }, - "pluginVersion": "7.3.4", - "targets": [ - { - "exemplar": true, - "expr": "avg by(schema_type)(kafka_schema_registry_schemas_deleted{job=\"schema-registry\",env=\"$env\"})", - "instant": true, - "interval": "", - "legendFormat": "{{schema_type}}", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Schemas deleted", - "type": "piechart" - }, - { - "datasource": "Prometheus", - "fieldConfig": { - "defaults": { + "custom": {}, + "decimals": null, "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] - } + }, + "unit": "none" }, "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, - "x": 0, - "y": 5 + "x": 8, + "y": 0 }, - "id": 25, + "height": null, + "hideTimeOverride": false, + "id": 4, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": { "calcs": [ - "mean" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "exemplar": true, - "expr": "sum(kafka_schema_registry_registered_count{job=\"schema-registry\",env=\"$env\"})", - "instant": true, + "datasource": null, + "expr": "avg(kafka_schema_registry_schemas_created{env=\"$env\"}) by (schema_type)", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{schema_type}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Schemas registered", + "title": "SR: Created Schemas by Type (avg.)", + "transformations": [], + "transparent": false, "type": "stat" }, { - "collapsed": false, - "datasource": null, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 9 - }, - "id": 15, - "panels": [], - "title": "System", - "type": "row" - }, - { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], + "custom": {}, + "decimals": null, "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] }, - "unit": "percent" + "unit": "none" }, "overrides": [] }, "gridPos": { - "h": 7, - "w": 8, - "x": 0, - "y": 10 + "h": 5, + "w": 4, + "x": 12, + "y": 0 }, - "id": 13, + "height": null, + "hideTimeOverride": false, + "id": 5, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { - "legend": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { "calcs": [ - "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "table", - "placement": "bottom" + "fields": "", + "values": false }, - "tooltip": { - "mode": "single" - } + "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "irate(process_cpu_seconds_total{job=\"schema-registry\",env=\"$env\"}[5m])*100", + "datasource": null, + "expr": "sum(kafka_schema_registry_schemas_deleted{env=\"$env\"}) by (schema_type)", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{schema_type}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "CPU Usage", - "type": "timeseries" + "title": "SR: Sum of Deleted Schemas by Type", + "transformations": [], + "transparent": false, + "type": "stat" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "links": [], + "custom": {}, + "decimals": null, "mappings": [], - "min": 0, + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] }, - "unit": "bytes" + "unit": "none" }, "overrides": [] }, "gridPos": { - "h": 7, - "w": 8, - "x": 8, - "y": 10 + "h": 5, + "w": 4, + "x": 16, + "y": 0 }, - "id": 21, + "height": null, + "hideTimeOverride": false, + "id": 6, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { - "legend": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { "calcs": [ - "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "table", - "placement": "bottom" + "fields": "", + "values": false }, - "tooltip": { - "mode": "single" - } + "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum without(area)(jvm_memory_bytes_used{job=\"schema-registry\",env=\"$env\"})", - "interval": "", - "legendFormat": "Used:{{instance}}", - "refId": "A" - }, - { - "expr": "jvm_memory_bytes_max{job=\"schema-registry\",env=\"$env\",area=\"heap\"}", + "datasource": null, + "expr": "sum(kafka_schema_registry_kafka_schema_registry_metrics_connection_count)", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "Max:{{instance}}", - "refId": "B" + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "JVM Memory Used", - "type": "timeseries" + "title": "SR: Sum of Active Connections", + "transformations": [], + "transparent": false, + "type": "stat" }, { - "datasource": "Prometheus", - "description": "", + "cacheTimeout": null, + "collapsed": false, + "datasource": null, + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "decimals": 3, - "links": [], - "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 8, - "x": 16, - "y": 10 - }, - "id": 23, - "options": { - "legend": { - "calcs": [ - "mean", - "lastNotNull", - "max" - ], - "displayMode": "table", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" + "steps": [] + } } }, - "pluginVersion": "8.1.3", - "targets": [ - { - "expr": "sum without(gc)(rate(jvm_gc_collection_seconds_sum{job=\"schema-registry\",env=\"$env\"}[5m]))", - "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Time spent in GC", - "type": "timeseries" - }, - { - "datasource": null, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 17 + "y": 1 }, - "id": 17, - "title": "Connections", + "height": null, + "hideTimeOverride": false, + "id": 7, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "System", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -691,7 +548,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -702,14 +559,12 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } @@ -717,34 +572,32 @@ "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "short" + "unit": "percentunit" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 10, "w": 8, "x": 0, - "y": 18 + "y": 1 }, - "id": 4, + "height": null, + "hideTimeOverride": false, + "id": 8, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -753,22 +606,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_schema_registry_jetty_metrics_connections_active{job=\"schema-registry\",env=\"$env\"}", + "datasource": null, + "expr": "irate(process_cpu_seconds_total{env=\"$env\",hostname=~\"$sr_server\"}[5m])", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Active Connections", + "title": "CPU usage", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -779,7 +648,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -790,14 +659,12 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } @@ -805,34 +672,32 @@ "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "short" + "unit": "bytes" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 10, "w": 8, "x": 8, - "y": 18 + "y": 1 }, - "id": 6, + "height": null, + "hideTimeOverride": false, + "id": 9, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -841,22 +706,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_schema_registry_jersey_metrics_request_rate{job=\"schema-registry\",env=\"$env\"}", + "datasource": null, + "expr": "sum without(area)(jvm_memory_bytes_used{env=\"$env\",hostname=~\"$sr_server\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Requests Rate", + "title": "Memory usage", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -867,7 +748,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -878,14 +759,12 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } @@ -893,34 +772,32 @@ "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] + "steps": [] }, - "unit": "short" + "unit": "percentunit" }, "overrides": [] }, "gridPos": { - "h": 8, + "h": 10, "w": 8, "x": 16, - "y": 18 + "y": 1 }, - "id": 24, + "height": null, + "hideTimeOverride": false, + "id": 10, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -929,66 +806,132 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "kafka_schema_registry_jersey_metrics_request_latency_99{job=\"schema-registry\",env=\"$env\"}", + "datasource": null, + "expr": "sum without(gc)(irate(jvm_gc_collection_seconds_sum{env=\"$env\",hostname=~\"$sr_server\"}[5m]))", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Requests latency 99p", + "title": "GC collection", + "transformations": [], + "transparent": false, "type": "timeseries" } ], - "refresh": "1m", - "schemaVersion": 30, + "refresh": "30s", + "rows": [], + "schemaVersion": 12, + "sharedCrosshair": false, "style": "dark", - "tags": [], + "tags": [ + "confluent", + "schema-registry" + ], "templating": { "list": [ { "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { "selected": false, - "text": "dev", - "value": "dev" + "tags": [], + "text": null, + "value": null }, "datasource": "Prometheus", - "definition": "label_values(env)", - "description": null, - "error": null, "hide": 0, "includeAll": false, "label": "Environment", "multi": false, "name": "env", "options": [], - "query": { - "query": "label_values(env)", - "refId": "Prometheus-env-Variable-Query" + "query": "label_values(env)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null }, + "datasource": "Prometheus", + "hide": 0, + "includeAll": true, + "label": "Server", + "multi": true, + "name": "sr_server", + "options": [], + "query": "label_values(kafka_schema_registry_registered_count{env=\"$env\"}, hostname)", "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, "type": "query", "useTags": false } ] }, "time": { - "from": "now-12h", + "from": "now-1h", "to": "now" }, - "timepicker": {}, - "timezone": "", - "title": "Schema Registry cluster", - "uid": "9ixzve-Mk", - "version": 2 -} \ No newline at end of file + "timepicker": { + "hidden": false, + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Schema Registry cluster - v2", + "uid": null, + "version": 0 +} diff --git a/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/zookeeper-cluster.json b/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/zookeeper-cluster.json index 04646780..465129dc 100644 --- a/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/zookeeper-cluster.json +++ b/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/zookeeper-cluster.json @@ -1,76 +1,103 @@ { + "__inputs": [ + { + "description": "", + "label": "Prometheus", + "name": "DS_PROMETHEUS", + "pluginId": "prometheus", + "pluginName": "Prometheus", + "type": "datasource" + } + ], "annotations": { - "list": [ - { - "builtIn": 1, - "datasource": "-- Grafana --", - "enable": true, - "hide": true, - "iconColor": "rgba(0, 211, 255, 1)", - "name": "Annotations & Alerts", - "target": { - "limit": 100, - "matchAny": false, - "tags": [], - "type": "dashboard" - }, - "type": "dashboard" - } - ] + "list": [] }, + "description": "Overview of the Zookeeper cluster", "editable": true, "gnetId": null, - "graphTooltip": 0, - "id": 5, - "iteration": 1632253434096, + "hideControls": false, + "id": null, "links": [], "panels": [ { + "cacheTimeout": null, + "collapsed": false, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }, - "id": 22, - "title": "Health Check", + "height": null, + "hideTimeOverride": false, + "id": 1, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Overview", + "transformations": [], + "transparent": false, "type": "row" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Quorum Size of Zookeeper ensemble", + "description": "Quorum Size of Zookeeper ensemble.\n Count Zookeeper servers with quorum size metric.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "#d44a3a", - "value": null + "color": "red", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" }, { - "color": "rgba(237, 129, 40, 0.89)", - "value": 2 + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 2.0, + "yaxis": "left" }, { - "color": "#299c46", - "value": 3 + "color": "green", + "index": 2, + "line": true, + "op": "gt", + "value": 3.0, + "yaxis": "left" } ] }, @@ -79,78 +106,81 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 0, - "y": 1 + "y": 0 }, + "height": null, + "hideTimeOverride": false, "id": 2, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "count(zookeeper_status_quorumsize{job=\"zookeeper\",env=\"$env\"})", + "datasource": null, + "expr": "count(zookeeper_status_quorumsize{env=\"$env\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Zookeeper nodes online", + "title": "ZK: Quorum Size", + "transformations": [], + "transparent": false, "type": "stat" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Number of Alive Connections", + "description": "Average size of ZNodes in the cluster.\n Getting the node count per server, and averaging the node count.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "dark-green", - "value": null - }, - { - "color": "rgba(237, 129, 40, 0.89)", - "value": 100 - }, - { - "color": "#d44a3a", - "value": 200 + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] }, @@ -159,163 +189,180 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, "x": 4, - "y": 1 + "y": 0 }, - "id": 4, + "height": null, + "hideTimeOverride": false, + "id": 3, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(zookeeper_numaliveconnections{job=\"zookeeper\",env=\"$env\"})", + "datasource": null, + "expr": "avg(zookeeper_inmemorydatatree_nodecount{env=\"$env\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Alive Connections", + "title": "ZK: ZNodes (avg.)", + "transformations": [], + "transparent": false, "type": "stat" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "Number of queued requests in the server. This goes up when the server receives more requests than it can process", + "description": "Sum of the number of alive connections per servers divided by the maximum number of client connections allowed per host.\n If the percentage is higher than 60%, then Zookeeper should be scaled and/or the Zookeeper clients should be investigated to find the reason for high number of connections opened.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "line+area" - } - }, - "decimals": 0, - "links": [], + "custom": {}, + "decimals": null, "mappings": [], - "min": 0, + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "transparent", - "value": null + "color": "green", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" + }, + { + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 0.6, + "yaxis": "left" }, { "color": "red", - "value": 10 + "index": 2, + "line": true, + "op": "gt", + "value": 0.8, + "yaxis": "left" } ] }, - "unit": "short" + "unit": "percentunit" }, "overrides": [] }, "gridPos": { - "h": 8, - "w": 16, + "h": 5, + "w": 4, "x": 8, - "y": 1 + "y": 0 }, - "id": 7, + "height": null, + "hideTimeOverride": false, + "id": 4, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { - "legend": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { "calcs": [ - "mean", - "lastNotNull", - "max" + "last" ], - "displayMode": "table", - "placement": "bottom" + "fields": "", + "values": false }, - "tooltip": { - "mode": "single" - } + "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "zookeeper_outstandingrequests{job=\"zookeeper\",env=\"$env\"}", + "datasource": null, + "expr": "zookeeper_numaliveconnections{env=\"$env\"} / zookeeper_maxclientcnxnsperhost{env=\"$env\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{server_id}}:{{member_type}} ({{instance}})", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], - "thresholds": [], "timeFrom": null, "timeShift": null, - "title": "Outstanding Requests", - "type": "timeseries" + "title": "ZK: Connections used", + "transformations": [], + "transparent": false, + "type": "stat" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": "Sum of client watchers subscribed to changes on the ZNodes.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "thresholds" - }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" - } - ], + "custom": {}, + "decimals": null, + "mappings": [], + "noValue": "none", "thresholds": { "mode": "absolute", "steps": [ { - "color": "dark-green", - "value": null + "color": "blue", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" } ] }, @@ -324,139 +371,230 @@ "overrides": [] }, "gridPos": { - "h": 4, + "h": 5, "w": 4, - "x": 0, - "y": 5 + "x": 12, + "y": 0 }, - "id": 3, + "height": null, + "hideTimeOverride": false, + "id": 5, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "colorMode": "value", - "graphMode": "none", + "graphMode": "area", "justifyMode": "auto", - "orientation": "horizontal", + "orientation": "auto", "reduceOptions": { "calcs": [ - "lastNotNull" + "last" ], "fields": "", "values": false }, - "text": {}, "textMode": "auto" }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "avg(zookeeper_inmemorydatatree_nodecount{job=\"zookeeper\",env=\"$env\"})", + "datasource": null, + "expr": "sum(zookeeper_inmemorydatatree_watchcount{env=\"$env\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", + "intervalFactor": 2, "legendFormat": "", - "refId": "A" + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Number of ZNodes", + "title": "ZK: Sum of watchers", + "transformations": [], + "transparent": false, "type": "stat" }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": "Number of Watchers", + "description": "Number of requests waiting for processing (queued).\n If the number of outstanding requests grows higher than 10, then the Zookeeper hosts should be checked.\n It could mean that there is not enough resources to cope with the number of requests.\n ", + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { - "mode": "thresholds" + "mode": "palette-classic" }, - "mappings": [ - { - "options": { - "match": "null", - "result": { - "text": "N/A" - } - }, - "type": "special" + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" } - ], + }, + "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { - "color": "dark-green", - "value": null + "color": "green", + "index": 0, + "line": true, + "op": "gt", + "value": "null", + "yaxis": "left" }, { - "color": "rgba(237, 129, 40, 0.89)", - "value": 500 + "color": "yellow", + "index": 1, + "line": true, + "op": "gt", + "value": 1.0, + "yaxis": "left" }, { - "color": "#d44a3a", - "value": 1000 + "color": "red", + "index": 2, + "line": true, + "op": "gt", + "value": 10.0, + "yaxis": "left" } ] }, - "unit": "none" + "unit": "" }, "overrides": [] }, "gridPos": { - "h": 4, - "w": 4, - "x": 4, - "y": 5 + "h": 5, + "w": 8, + "x": 16, + "y": 0 }, - "id": 5, + "height": null, + "hideTimeOverride": false, + "id": 6, "interval": null, "links": [], "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { - "colorMode": "value", - "graphMode": "none", - "justifyMode": "auto", - "orientation": "horizontal", - "reduceOptions": { + "legend": { "calcs": [ - "lastNotNull" + "max", + "last" ], - "fields": "", - "values": false + "displayMode": "table", + "placement": "right" }, - "text": {}, - "textMode": "auto" + "tooltip": { + "mode": "single" + } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum(zookeeper_inmemorydatatree_watchcount{job=\"zookeeper\",env=\"$env\"})", + "datasource": null, + "expr": "zookeeper_outstandingrequests{env=\"$env\"}", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}} ({{server_id}}:{{member_type}})", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Number of Watchers", - "type": "stat" + "title": "ZK: Outstanding Requests", + "transformations": [], + "transparent": false, + "type": "timeseries" }, { + "cacheTimeout": null, + "collapsed": false, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 9 + "y": 1 }, - "id": 20, + "height": null, + "hideTimeOverride": false, + "id": 7, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, "title": "System", + "transformations": [], + "transparent": false, "type": "row" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -467,7 +605,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -478,46 +616,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "steps": [] }, - "unit": "percent" + "unit": "percentunit" }, "overrides": [] }, "gridPos": { - "h": 7, + "h": 10, "w": 8, "x": 0, - "y": 10 + "y": 1 }, - "id": 12, + "height": null, + "hideTimeOverride": false, + "id": 8, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -526,23 +663,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "irate(process_cpu_seconds_total{job=\"zookeeper\",env=\"$env\"}[5m])*100", + "datasource": null, + "expr": "irate(process_cpu_seconds_total{env=\"$env\",hostname=~\"$zk_server\"}[5m])", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "CPU Usage", + "title": "CPU usage", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -553,7 +705,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -564,47 +716,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "steps": [] }, "unit": "bytes" }, "overrides": [] }, "gridPos": { - "h": 7, + "h": 10, "w": 8, "x": 8, - "y": 10 + "y": 1 }, - "id": 13, + "height": null, + "hideTimeOverride": false, + "id": 9, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -613,29 +763,38 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum without(area)(jvm_memory_bytes_used{job=\"zookeeper\",env=\"$env\"})", + "datasource": null, + "expr": "sum without(area)(jvm_memory_bytes_used{env=\"$env\",hostname=\"$zk_server\"})", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "Used:{{instance}}", - "refId": "A" - }, - { - "expr": "jvm_memory_bytes_max{job=\"zookeeper\",env=\"$env\",area=\"heap\"}", - "interval": "", - "legendFormat": "Max:{{instance}}", - "refId": "B" + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "JVM Memory Used", + "title": "Memory usage", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, "datasource": "Prometheus", - "description": "", + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { "color": { @@ -646,7 +805,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -657,48 +816,45 @@ "lineWidth": 1, "pointSize": 5, "scaleDistribution": { + "log": 2, "type": "linear" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, "thresholdsStyle": { "mode": "off" } }, - "decimals": 3, - "links": [], "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "steps": [] }, "unit": "percentunit" }, "overrides": [] }, "gridPos": { - "h": 7, + "h": 10, "w": 8, "x": 16, - "y": 10 + "y": 1 }, - "id": 14, + "height": null, + "hideTimeOverride": false, + "id": 10, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, "options": { "legend": { "calcs": [ + "max", "mean", - "lastNotNull", - "max" + "last" ], "displayMode": "table", "placement": "bottom" @@ -707,329 +863,1028 @@ "mode": "single" } }, - "pluginVersion": "8.1.3", + "repeat": null, + "repeatDirection": null, + "span": null, "targets": [ { - "expr": "sum without(gc)(rate(jvm_gc_collection_seconds_sum{job=\"zookeeper\",env=\"$env\"}[5m]))", + "datasource": null, + "expr": "sum without(gc)(irate(jvm_gc_collection_seconds_sum{env=\"$env\",hostname=\"$zk_server\"}[5m]))", + "format": "time_series", + "hide": false, + "instant": false, "interval": "", - "legendFormat": "{{instance}}", - "refId": "A" + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } ], "timeFrom": null, "timeShift": null, - "title": "Time spent in GC", + "title": "GC collection", + "transformations": [], + "transparent": false, "type": "timeseries" }, { + "cacheTimeout": null, + "collapsed": true, "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, "gridPos": { "h": 1, "w": 24, "x": 0, - "y": 17 + "y": 2 }, - "id": 18, - "title": "Request Latency", - "type": "row" - }, - { - "datasource": "Prometheus", - "description": "Amount of time it takes for the server to respond to a client request", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "height": null, + "hideTimeOverride": false, + "id": 11, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 12, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" }, - "thresholdsStyle": { - "mode": "off" + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "zookeeper_minrequestlatency{env=\"$env\"} * zookeeper_ticktime", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } + ], + "timeFrom": null, + "timeShift": null, + "title": "ZK: Request Latency (Minimum)", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 2 }, - "decimals": 0, + "height": null, + "hideTimeOverride": false, + "id": 13, + "interval": null, "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 8, - "x": 0, - "y": 18 - }, - "id": 9, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "zookeeper_avgrequestlatency{env=\"$env\"} * zookeeper_ticktime", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ZK: Request Latency (Average)", + "transformations": [], + "transparent": false, + "type": "timeseries" }, - "tooltip": { - "mode": "single" - } - }, - "pluginVersion": "8.1.3", - "targets": [ { - "expr": "zookeeper_minrequestlatency{job=\"zookeeper\",env=\"$env\"} * zookeeper_ticktime", - "interval": "", - "legendFormat": "{{server_id}}:{{member_type}} ({{instance}})", - "refId": "A" + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 14, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "zookeeper_maxrequestlatency{env=\"$env\"} * zookeeper_ticktime", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "ZK: Request Latency (Maximum)", + "transformations": [], + "transparent": false, + "type": "timeseries" } ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], "timeFrom": null, "timeShift": null, - "title": "Request Latency - Minimum", - "type": "timeseries" + "title": "Server Latency", + "transformations": [], + "transparent": false, + "type": "row" }, { - "datasource": "Prometheus", - "description": "Amount of time it takes for the server to respond to a client request", + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, "fieldConfig": { "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "decimals": 0, - "links": [], - "mappings": [], - "min": 0, "thresholds": { "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] - }, - "unit": "ms" - }, - "overrides": [] + "steps": [] + } + } }, "gridPos": { - "h": 7, - "w": 8, - "x": 8, - "y": 18 - }, - "id": 10, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" - }, - "tooltip": { - "mode": "single" - } + "h": 1, + "w": 24, + "x": 0, + "y": 3 }, - "pluginVersion": "8.1.3", - "targets": [ + "height": null, + "hideTimeOverride": false, + "id": 15, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ { - "exemplar": true, - "expr": "zookeeper_avgrequestlatency{job=\"zookeeper\",env=\"$env\"} * zookeeper_ticktime", - "interval": "", - "legendFormat": "{{server_id}}:{{member_type}} ({{instance}})", - "refId": "A" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Request Latency - Average", - "type": "timeseries" - }, - { - "datasource": "Prometheus", - "description": "Amount of time it takes for the server to respond to a client request", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 16, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_server_zookeeperclientmetrics_zookeeperrequestlatencyms{env=\"$env\",quantile=~\"$quantile\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Request Latency", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" }, - "showPoints": "never", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 3 + }, + "height": null, + "hideTimeOverride": false, + "id": 17, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" }, - "thresholdsStyle": { - "mode": "off" + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_server_sessionexpirelistener_zookeepersyncconnectspersec{env=\"$env\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Sync Connections/sec", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 3 }, + "height": null, + "hideTimeOverride": false, + "id": 18, + "interval": null, "links": [], - "mappings": [], - "min": 0, - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - } - ] + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } }, - "unit": "ms" + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_server_sessionexpirelistener_zookeeperexpirespersec{env=\"$env\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Expired Connections/sec", + "transformations": [], + "transparent": false, + "type": "timeseries" }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 8, - "x": 16, - "y": 18 - }, - "id": 11, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom" + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 19, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_server_sessionexpirelistener_zookeeperdisconnectspersec{env=\"$env\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Disconnected Connections/sec", + "transformations": [], + "transparent": false, + "type": "timeseries" }, - "tooltip": { - "mode": "single" - } - }, - "pluginVersion": "8.1.3", - "targets": [ { - "expr": "zookeeper_maxrequestlatency{job=\"zookeeper\",env=\"$env\"} * zookeeper_ticktime", - "interval": "", - "legendFormat": "{{server_id}}:{{member_type}} ({{instance}})", - "refId": "A" + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 4 + }, + "height": null, + "hideTimeOverride": false, + "id": 20, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_server_sessionexpirelistener_zookeeperauthfailurespersec{env=\"$env\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Kafka: Auth Failures on Connections/sec", + "transformations": [], + "transparent": false, + "type": "timeseries" } ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], "timeFrom": null, "timeShift": null, - "title": "Request Latency - Maximum", - "type": "timeseries" + "title": "Client Latency (Kafka)", + "transformations": [], + "transparent": false, + "type": "row" } ], - "refresh": "1m", - "schemaVersion": 30, + "refresh": "30s", + "rows": [], + "schemaVersion": 12, + "sharedCrosshair": false, "style": "dark", - "tags": [], + "tags": [ + "confluent", + "kafka", + "zookeeper" + ], "templating": { "list": [ { "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", "current": { "selected": false, - "text": "dev", - "value": "dev" + "tags": [], + "text": null, + "value": null }, "datasource": "Prometheus", - "definition": "label_values(env)", - "description": null, - "error": null, "hide": 0, "includeAll": false, "label": "Environment", "multi": false, "name": "env", "options": [], - "query": { - "query": "label_values(env)", - "refId": "Prometheus-env-Variable-Query" + "query": "label_values(env)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null }, + "datasource": "Prometheus", + "hide": 0, + "includeAll": true, + "label": "Server", + "multi": true, + "name": "zk_server", + "options": [], + "query": "label_values(zookeeper_outstandingrequests{env=\"$env\"}, hostname)", "refresh": 1, - "regex": "", - "skipUrlSync": false, - "sort": 0, - "tagValuesQuery": "", - "tagsQuery": "", + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, + "type": "query", + "useTags": false + }, + { + "allValue": null, + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "selected": false, + "tags": [], + "text": null, + "value": null + }, + "datasource": "Prometheus", + "hide": 0, + "includeAll": false, + "label": "Quantile", + "multi": false, + "name": "quantile", + "options": [], + "query": "label_values(kafka_server_zookeeperclientmetrics_zookeeperrequestlatencyms{env=\"$env\"}, quantile)", + "refresh": 1, + "regex": null, + "sort": 1, + "tagValuesQuery": null, + "tagsQuery": null, "type": "query", "useTags": false } ] }, "time": { - "from": "now-12h", + "from": "now-1h", "to": "now" }, "timepicker": { + "hidden": false, "refresh_intervals": [ + "5s", "10s", "30s", "1m", @@ -1039,10 +1894,21 @@ "1h", "2h", "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" ] }, - "timezone": "", - "title": "Zookeeper cluster", - "uid": "H4xS98vWk", - "version": 1 -} \ No newline at end of file + "timezone": "browser", + "title": "Zookeeper cluster - v2", + "uid": null, + "version": 0 +} From f689f86005bd3ac720f4512feba0135bd8c31dfb Mon Sep 17 00:00:00 2001 From: Jorge Esteban Quilcate Otoya Date: Tue, 12 Jul 2022 16:27:41 +0100 Subject: [PATCH 25/28] docs: add comments to sr, connect, and kafka --- .../grafana/kafka-connect-cluster.json | 502 ++++++------- .../grafana/ksqldb-cluster.json | 132 ++-- .../grafana/schema-registry-cluster.json | 30 +- grafana-dashboards/kafka-cluster.py | 108 ++- grafana-dashboards/kafka-connect-cluster.py | 677 ++++++++---------- grafana-dashboards/ksqldb-cluster.py | 429 +++++------ grafana-dashboards/schema-registry-cluster.py | 103 ++- .../dashboards/kafka-connect-cluster.json | 424 +++++------ .../dashboards/ksqldb-cluster.json | 56 +- .../dashboards/schema-registry-cluster.json | 10 +- 10 files changed, 1152 insertions(+), 1319 deletions(-) diff --git a/cfk-prometheus-grafana/grafana/kafka-connect-cluster.json b/cfk-prometheus-grafana/grafana/kafka-connect-cluster.json index c230452e..92b49191 100644 --- a/cfk-prometheus-grafana/grafana/kafka-connect-cluster.json +++ b/cfk-prometheus-grafana/grafana/kafka-connect-cluster.json @@ -55,14 +55,14 @@ "targets": [], "timeFrom": null, "timeShift": null, - "title": "Overview", + "title": "Cluster Overview", "transformations": [], "transparent": false, "type": "row" }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": "Kafka Connect online workers returning metrics.\n ", "editable": true, "error": false, @@ -145,7 +145,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": "Number of tasks deployed on Kafka Connect cluster.\n ", "editable": true, "error": false, @@ -228,7 +228,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": "Number of Running Tasks on the Kafka Connect cluster.\n Ideally, this number should be equal to the total number of tasks deployed.\n ", "editable": true, "error": false, @@ -319,7 +319,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": "Number of Paused Tasks on the Kafka Connect cluster.\n Ideally, this number should be zero, as tasks should be running.\n ", "editable": true, "error": false, @@ -410,7 +410,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": "Number of Paused Tasks on the Kafka Connect cluster.\n Ideally, this number should be zero, as tasks should be running.\n It's recommended alerting when this value is higher than 0.\n ", "editable": true, "error": false, @@ -501,7 +501,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": "Informative value. Time since last rebalance.\n When this value is continuously and repeatedly low means some connectors are failing and rebalancing is triggered constantly.\n ", "editable": true, "error": false, @@ -588,8 +588,8 @@ "mode": "thresholds" }, "columns": [], - "datasource": "Prometheus", - "description": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Connect workers metadata and stats.\n ", "editable": true, "error": false, "fieldConfig": { @@ -808,8 +808,8 @@ "mode": "thresholds" }, "columns": [], - "datasource": "Prometheus", - "description": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Connectors deployed and task stats.\n ", "editable": true, "error": false, "fieldConfig": { @@ -962,8 +962,8 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, + "datasource": "${DS_PROMETHEUS}", + "description": "How much time the connector tasks are in running state.\n ", "editable": true, "error": false, "fieldConfig": { @@ -1062,8 +1062,8 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Average ime spent on rebalance state.\n ", "editable": true, "error": false, "fieldConfig": { @@ -1155,7 +1155,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "Rebalance Latency", + "title": "Rebalance Latency (avg.)", "transformations": [], "transparent": false, "type": "timeseries" @@ -1203,7 +1203,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": "Rate of CPU seconds used by the Java process.\n 100% usage represents one core. \n If there are multiple cores, the total capacity should be 100% * number_cores.", "editable": true, "error": false, @@ -1303,7 +1303,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": "Sum of JVM memory used, without including areas (e.g. heap size).", "editable": true, "error": false, @@ -1403,7 +1403,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": "Sum of seconds used by Garbage Collection.", "editable": true, "error": false, @@ -1533,8 +1533,8 @@ "panels": [ { "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Maximum and average size of the batches processed by the connector task.", "editable": true, "error": false, "fieldConfig": { @@ -1617,98 +1617,12 @@ "instant": false, "interval": "", "intervalFactor": 2, - "legendFormat": "{{connector}}[{{task}}]", + "legendFormat": "{{connector}}[{{task}}] (avg.)", "metric": "", "refId": "", "step": 10, "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Batch Size (Avg.)", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 6 - }, - "height": null, - "hideTimeOverride": false, - "id": 18, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ { "datasource": null, "expr": "kafka_connect_connector_task_metrics_batch_size_max{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", @@ -1717,7 +1631,7 @@ "instant": false, "interval": "", "intervalFactor": 2, - "legendFormat": "{{connector}}[{{task}}]", + "legendFormat": "{{connector}}[{{task}}] (max.)", "metric": "", "refId": "", "step": 10, @@ -1726,15 +1640,15 @@ ], "timeFrom": null, "timeShift": null, - "title": "Batch Size (Max.)", + "title": "Batch Size", "transformations": [], "transparent": false, "type": "timeseries" }, { "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Percentage of offset commit successful and failed.", "editable": true, "error": false, "fieldConfig": { @@ -1781,11 +1695,11 @@ "h": 10, "w": 8, "x": 0, - "y": 7 + "y": 6 }, "height": null, "hideTimeOverride": false, - "id": 19, + "id": 18, "interval": null, "links": [], "maxDataPoints": 100, @@ -1817,7 +1731,21 @@ "instant": false, "interval": "", "intervalFactor": 2, - "legendFormat": "{{connector}}[{{task}}]", + "legendFormat": "{{connector}}[{{task}}] (success)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "kafka_connect_connector_task_metrics_offset_commit_failure_percentage{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}] (failure)", "metric": "", "refId": "", "step": 10, @@ -1826,15 +1754,15 @@ ], "timeFrom": null, "timeShift": null, - "title": "Offset commit success %", + "title": "Offset commit", "transformations": [], "transparent": false, "type": "timeseries" }, { "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Average and Maximum time in milliseconds taken by the task to commit offsets", "editable": true, "error": false, "fieldConfig": { @@ -1880,12 +1808,12 @@ "gridPos": { "h": 10, "w": 8, - "x": 8, - "y": 7 + "x": 16, + "y": 6 }, "height": null, "hideTimeOverride": false, - "id": 20, + "id": 19, "interval": null, "links": [], "maxDataPoints": 100, @@ -1917,7 +1845,21 @@ "instant": false, "interval": "", "intervalFactor": 2, - "legendFormat": "{{connector}}[{{task}}]", + "legendFormat": "{{connector}}[{{task}}] (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "kafka_connect_connector_task_metrics_offset_commit_max_time_ms{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}] (max.)", "metric": "", "refId": "", "step": 10, @@ -1926,7 +1868,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "Offset commit avg. latency", + "title": "Offset commit latency", "transformations": [], "transparent": false, "type": "timeseries" @@ -1962,11 +1904,11 @@ "h": 1, "w": 24, "x": 0, - "y": 8 + "y": 7 }, "height": null, "hideTimeOverride": false, - "id": 21, + "id": 20, "interval": null, "links": [], "maxDataPoints": 100, @@ -1975,8 +1917,8 @@ "panels": [ { "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Total number of failures seen by task.", "editable": true, "error": false, "fieldConfig": { @@ -2023,11 +1965,11 @@ "h": 10, "w": 8, "x": 0, - "y": 8 + "y": 7 }, "height": null, "hideTimeOverride": false, - "id": 22, + "id": 21, "interval": null, "links": [], "maxDataPoints": 100, @@ -2075,8 +2017,8 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Total number of errors seen by task.", "editable": true, "error": false, "fieldConfig": { @@ -2123,11 +2065,11 @@ "h": 10, "w": 8, "x": 8, - "y": 8 + "y": 7 }, "height": null, "hideTimeOverride": false, - "id": 23, + "id": 22, "interval": null, "links": [], "maxDataPoints": 100, @@ -2175,8 +2117,8 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Total number of records skipped seen by task.", "editable": true, "error": false, "fieldConfig": { @@ -2223,11 +2165,11 @@ "h": 10, "w": 8, "x": 16, - "y": 8 + "y": 7 }, "height": null, "hideTimeOverride": false, - "id": 24, + "id": 23, "interval": null, "links": [], "maxDataPoints": 100, @@ -2275,8 +2217,8 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Total number of records logged seen by task.", "editable": true, "error": false, "fieldConfig": { @@ -2323,11 +2265,11 @@ "h": 10, "w": 8, "x": 0, - "y": 9 + "y": 8 }, "height": null, "hideTimeOverride": false, - "id": 25, + "id": 24, "interval": null, "links": [], "maxDataPoints": 100, @@ -2375,8 +2317,8 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Total number of retries seen by task.", "editable": true, "error": false, "fieldConfig": { @@ -2423,11 +2365,11 @@ "h": 10, "w": 8, "x": 8, - "y": 9 + "y": 8 }, "height": null, "hideTimeOverride": false, - "id": 26, + "id": 25, "interval": null, "links": [], "maxDataPoints": 100, @@ -2475,8 +2417,8 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Number of produce requests to dead letter topics.", "editable": true, "error": false, "fieldConfig": { @@ -2523,11 +2465,11 @@ "h": 10, "w": 8, "x": 16, - "y": 9 + "y": 8 }, "height": null, "hideTimeOverride": false, - "id": 27, + "id": 26, "interval": null, "links": [], "maxDataPoints": 100, @@ -2604,11 +2546,11 @@ "h": 1, "w": 24, "x": 0, - "y": 10 + "y": 9 }, "height": null, "hideTimeOverride": false, - "id": 28, + "id": 27, "interval": null, "links": [], "maxDataPoints": 100, @@ -2617,8 +2559,8 @@ "panels": [ { "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Average and Maximum time in milliseconds taken by this task to poll for a batch of source records", "editable": true, "error": false, "fieldConfig": { @@ -2665,11 +2607,11 @@ "h": 10, "w": 8, "x": 0, - "y": 10 + "y": 9 }, "height": null, "hideTimeOverride": false, - "id": 29, + "id": 28, "interval": null, "links": [], "maxDataPoints": 100, @@ -2701,7 +2643,21 @@ "instant": false, "interval": "", "intervalFactor": 2, - "legendFormat": "{{connector}}[{{task}}]", + "legendFormat": "{{connector}}[{{task}}] (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "kafka_connect_source_task_metrics_poll_batch_max_time_ms{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}] (max.)", "metric": "", "refId": "", "step": 10, @@ -2710,15 +2666,15 @@ ], "timeFrom": null, "timeShift": null, - "title": "Poll Batch Avg. Latency", + "title": "Poll Batch Latency", "transformations": [], "transparent": false, "type": "timeseries" }, { "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Before transformations are applied, \n this is the average per-second number of records produced or \n polled by the task belonging to the named source connector in the worker\n ", "editable": true, "error": false, "fieldConfig": { @@ -2757,7 +2713,7 @@ "mode": "absolute", "steps": [] }, - "unit": "ms" + "unit": "ops" }, "overrides": [] }, @@ -2765,11 +2721,11 @@ "h": 10, "w": 8, "x": 8, - "y": 10 + "y": 9 }, "height": null, "hideTimeOverride": false, - "id": 30, + "id": 29, "interval": null, "links": [], "maxDataPoints": 100, @@ -2795,7 +2751,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_connect_source_task_metrics_poll_batch_max_time_ms{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", + "expr": "kafka_connect_source_task_metrics_source_record_poll_rate{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", "hide": false, "instant": false, @@ -2810,15 +2766,15 @@ ], "timeFrom": null, "timeShift": null, - "title": "Poll Batch Max. Latency", + "title": "Source Record Poll Rate", "transformations": [], "transparent": false, "type": "timeseries" }, { "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, + "datasource": "${DS_PROMETHEUS}", + "description": "After transformations are applied, \n this is the average per-second number of records output from the transformations and \n written to Kafka for the task belonging to the named source connector in the worker \n (excludes any records filtered out by the transformations)\n ", "editable": true, "error": false, "fieldConfig": { @@ -2864,12 +2820,12 @@ "gridPos": { "h": 10, "w": 8, - "x": 0, - "y": 11 + "x": 16, + "y": 9 }, "height": null, "hideTimeOverride": false, - "id": 31, + "id": 30, "interval": null, "links": [], "maxDataPoints": 100, @@ -2895,7 +2851,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_connect_source_task_metrics_source_record_poll_rate{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", + "expr": "kafka_connect_source_task_metrics_source_record_write_rate{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", "hide": false, "instant": false, @@ -2910,14 +2866,56 @@ ], "timeFrom": null, "timeShift": null, - "title": "Source Record Poll Rate", + "title": "Source Record Write Rate", "transformations": [], "transparent": false, "type": "timeseries" - }, + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Source Tasks", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 10 + }, + "height": null, + "hideTimeOverride": false, + "id": 31, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -2957,15 +2955,15 @@ "mode": "absolute", "steps": [] }, - "unit": "ops" + "unit": "ms" }, "overrides": [] }, "gridPos": { "h": 10, "w": 8, - "x": 8, - "y": 11 + "x": 0, + "y": 10 }, "height": null, "hideTimeOverride": false, @@ -2995,13 +2993,27 @@ "targets": [ { "datasource": null, - "expr": "kafka_connect_source_task_metrics_source_record_write_rate{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", + "expr": "kafka_connect_sink_task_metrics_put_batch_avg_time_ms{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 2, - "legendFormat": "{{connector}}[{{task}}]", + "legendFormat": "{{connector}}[{{task}}] (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "kafka_connect_sink_task_metrics_put_batch_max_time_ms{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}] (max.)", "metric": "", "refId": "", "step": 10, @@ -3010,57 +3022,15 @@ ], "timeFrom": null, "timeShift": null, - "title": "Source Record Write Rate", + "title": "Put Batch Latency", "transformations": [], "transparent": false, "type": "timeseries" - } - ], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Source Tasks", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 12 - }, - "height": null, - "hideTimeOverride": false, - "id": 33, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ + }, { "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Before transformations are applied, \n this is the average per-second number of records read from Kafka \n for the task belonging to the named sink connector in the worker\n ", "editable": true, "error": false, "fieldConfig": { @@ -3099,19 +3069,19 @@ "mode": "absolute", "steps": [] }, - "unit": "ms" + "unit": "ops" }, "overrides": [] }, "gridPos": { "h": 10, "w": 8, - "x": 0, - "y": 12 + "x": 8, + "y": 10 }, "height": null, "hideTimeOverride": false, - "id": 34, + "id": 33, "interval": null, "links": [], "maxDataPoints": 100, @@ -3137,7 +3107,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_connect_sink_task_metrics_put_batch_avg_time_ms{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", + "expr": "kafka_connect_sink_task_metrics_sink_record_read_rate{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", "hide": false, "instant": false, @@ -3152,15 +3122,15 @@ ], "timeFrom": null, "timeShift": null, - "title": "Put Batch Avg. Latency", + "title": "Sink Record Read Rate", "transformations": [], "transparent": false, "type": "timeseries" }, { "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, + "datasource": "${DS_PROMETHEUS}", + "description": "After transformations are applied, \n this is the average per-second number of records output from the transformations and \n sent to the task belonging to the named sink connector in the worker \n (excludes any records filtered out by the transformations)\n ", "editable": true, "error": false, "fieldConfig": { @@ -3199,19 +3169,19 @@ "mode": "absolute", "steps": [] }, - "unit": "ms" + "unit": "ops" }, "overrides": [] }, "gridPos": { "h": 10, "w": 8, - "x": 8, - "y": 12 + "x": 16, + "y": 10 }, "height": null, "hideTimeOverride": false, - "id": 35, + "id": 34, "interval": null, "links": [], "maxDataPoints": 100, @@ -3237,7 +3207,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_connect_sink_task_metrics_put_batch_max_time_ms{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", + "expr": "kafka_connect_sink_task_metrics_sink_record_send_rate{namespace=\"$env\",app=\"$connect_cluster\",pod=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", "hide": false, "instant": false, @@ -3252,15 +3222,15 @@ ], "timeFrom": null, "timeShift": null, - "title": "Put Batch Max. Latency", + "title": "Sink Record Send Rate", "transformations": [], "transparent": false, "type": "timeseries" }, { "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Number of topic partitions assigned to the task and \n which belong to the named sink connector in the worker\n ", "editable": true, "error": false, "fieldConfig": { @@ -3307,11 +3277,11 @@ "h": 10, "w": 8, "x": 0, - "y": 13 + "y": 11 }, "height": null, "hideTimeOverride": false, - "id": 36, + "id": 35, "interval": null, "links": [], "maxDataPoints": 100, @@ -3392,7 +3362,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 37, + "id": 36, "interval": null, "links": [], "maxDataPoints": 100, @@ -3401,8 +3371,8 @@ "panels": [ { "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Incoming byte rate per second per worker.", "editable": true, "error": false, "fieldConfig": { @@ -3453,7 +3423,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 38, + "id": 37, "interval": null, "links": [], "maxDataPoints": 100, @@ -3501,8 +3471,8 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Outgoing byte rate per second per worker.", "editable": true, "error": false, "fieldConfig": { @@ -3553,7 +3523,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 39, + "id": 38, "interval": null, "links": [], "maxDataPoints": 100, @@ -3601,8 +3571,8 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Fraction of time the I/O thread spent doing I/O", "editable": true, "error": false, "fieldConfig": { @@ -3653,7 +3623,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 40, + "id": 39, "interval": null, "links": [], "maxDataPoints": 100, @@ -3701,8 +3671,8 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Average number of network operations (reads or writes) on all connections per second", "editable": true, "error": false, "fieldConfig": { @@ -3753,7 +3723,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 41, + "id": 40, "interval": null, "links": [], "maxDataPoints": 100, @@ -3801,8 +3771,8 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Number of active connections", "editable": true, "error": false, "fieldConfig": { @@ -3853,7 +3823,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 42, + "id": 41, "interval": null, "links": [], "maxDataPoints": 100, @@ -3901,8 +3871,8 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Successful and failed authentications per second.", "editable": true, "error": false, "fieldConfig": { @@ -3953,7 +3923,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 43, + "id": 42, "interval": null, "links": [], "maxDataPoints": 100, @@ -4008,7 +3978,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "Authentications", + "title": "Rate of Authentication", "transformations": [], "transparent": false, "type": "timeseries" @@ -4048,7 +4018,7 @@ "text": null, "value": null }, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "hide": 0, "includeAll": false, "label": "Environment", @@ -4075,7 +4045,7 @@ "text": null, "value": null }, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "hide": 0, "includeAll": false, "label": "Connect cluster", @@ -4102,7 +4072,7 @@ "text": null, "value": null }, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "hide": 0, "includeAll": true, "label": "Connect worker", @@ -4129,7 +4099,7 @@ "text": null, "value": null }, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "hide": 0, "includeAll": true, "label": "Connector", diff --git a/cfk-prometheus-grafana/grafana/ksqldb-cluster.json b/cfk-prometheus-grafana/grafana/ksqldb-cluster.json index 054efed1..0e2eb79e 100644 --- a/cfk-prometheus-grafana/grafana/ksqldb-cluster.json +++ b/cfk-prometheus-grafana/grafana/ksqldb-cluster.json @@ -62,7 +62,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": "ksqlDB online instances returning metrics.\n ", "editable": true, "error": false, @@ -123,7 +123,7 @@ "targets": [ { "datasource": null, - "expr": "count(ksql_ksql_engine_query_stats_num_active_queries{namespace=\"$env\", app=\"$ksqldb_cluster\"})", + "expr": "count(ksql_ksql_engine_query_stats_num_active_queries{namespace=\"$env\",app=\"$ksqldb_cluster\"})", "format": "time_series", "hide": false, "instant": false, @@ -145,7 +145,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": "Number of active queries deployed in the cluster.\n ", "editable": true, "error": false, @@ -206,7 +206,7 @@ "targets": [ { "datasource": null, - "expr": "sum(ksql_ksql_engine_query_stats_num_active_queries{namespace=\"$env\", app=\"$ksqldb_cluster\"})", + "expr": "sum(ksql_ksql_engine_query_stats_num_active_queries{namespace=\"$env\",app=\"$ksqldb_cluster\"})", "format": "time_series", "hide": false, "instant": false, @@ -228,7 +228,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": "Number of running queries deployed in the cluster.\n Ideally, this number should be equal to the number of active queries as queries should be running.\n ", "editable": true, "error": false, @@ -297,7 +297,7 @@ "targets": [ { "datasource": null, - "expr": "sum(ksql_ksql_engine_query_stats_running_queries{namespace=\"$env\", app=\"$ksqldb_cluster\"})", + "expr": "sum(ksql_ksql_engine_query_stats_running_queries{namespace=\"$env\",app=\"$ksqldb_cluster\"})", "format": "time_series", "hide": false, "instant": false, @@ -319,7 +319,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": "Number of queries rebalancing in the cluster.\n Ideally, this number should be equal zero, or return to zero in a short period (e.g. 1 minute).\n It's recommended to alert if the number of rebalancing queries stay higher than 0 for a longer period of time.\n ", "editable": true, "error": false, @@ -388,7 +388,7 @@ "targets": [ { "datasource": null, - "expr": "sum(ksql_ksql_engine_query_stats_rebalancing_queries{namespace=\"$env\", app=\"$ksqldb_cluster\"})", + "expr": "sum(ksql_ksql_engine_query_stats_rebalancing_queries{namespace=\"$env\",app=\"$ksqldb_cluster\"})", "format": "time_series", "hide": false, "instant": false, @@ -410,7 +410,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": "Number of queries failed in the cluster.\n Ideally, this number should be equal zero.\n It's recommended to alert if the number of queries failed is higher than 0.\n ", "editable": true, "error": false, @@ -479,7 +479,7 @@ "targets": [ { "datasource": null, - "expr": "avg(ksql_ksql_engine_query_stats_error_queries{namespace=\"$env\", app=\"$ksqldb_cluster\"})", + "expr": "avg(ksql_ksql_engine_query_stats_error_queries{namespace=\"$env\",app=\"$ksqldb_cluster\"})", "format": "time_series", "hide": false, "instant": false, @@ -501,7 +501,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -601,7 +601,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -701,7 +701,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -842,7 +842,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": "Rate of CPU seconds used by the Java process.\n 100% usage represents one core. \n If there are multiple cores, the total capacity should be 100% * number_cores.", "editable": true, "error": false, @@ -942,7 +942,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": "Sum of JVM memory used, without including areas (e.g. heap size).", "editable": true, "error": false, @@ -1042,7 +1042,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": "Sum of seconds used by Garbage Collection.", "editable": true, "error": false, @@ -1172,7 +1172,7 @@ "panels": [ { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -1250,7 +1250,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_streams_stream_thread_metrics_poll_latency_avg{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "expr": "kafka_streams_stream_thread_metrics_poll_latency_avg{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", "hide": false, "instant": false, @@ -1272,7 +1272,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -1350,7 +1350,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_streams_stream_thread_metrics_poll_latency_max{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "expr": "kafka_streams_stream_thread_metrics_poll_latency_max{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", "hide": false, "instant": false, @@ -1372,7 +1372,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -1450,7 +1450,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_streams_stream_thread_metrics_process_latency_avg{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "expr": "kafka_streams_stream_thread_metrics_process_latency_avg{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", "hide": false, "instant": false, @@ -1472,7 +1472,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -1550,7 +1550,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_streams_stream_thread_metrics_process_latency_max{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "expr": "kafka_streams_stream_thread_metrics_process_latency_max{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", "hide": false, "instant": false, @@ -1572,7 +1572,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -1650,7 +1650,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_streams_stream_thread_metrics_commit_latency_avg{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "expr": "kafka_streams_stream_thread_metrics_commit_latency_avg{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", "hide": false, "instant": false, @@ -1672,7 +1672,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -1750,7 +1750,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_streams_stream_thread_metrics_commit_latency_max{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "expr": "kafka_streams_stream_thread_metrics_commit_latency_max{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", "hide": false, "instant": false, @@ -1772,7 +1772,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -1850,7 +1850,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_streams_stream_thread_metrics_punctuate_latency_avg{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "expr": "kafka_streams_stream_thread_metrics_punctuate_latency_avg{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", "hide": false, "instant": false, @@ -1872,7 +1872,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -1950,7 +1950,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_streams_stream_thread_metrics_punctuate_latency_max{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "expr": "kafka_streams_stream_thread_metrics_punctuate_latency_max{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", "hide": false, "instant": false, @@ -2014,7 +2014,7 @@ "panels": [ { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -2092,7 +2092,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_streams_stream_state_metrics_put_rate{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "expr": "kafka_streams_stream_state_metrics_put_rate{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", "hide": false, "instant": false, @@ -2114,7 +2114,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -2192,7 +2192,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_streams_stream_state_metrics_put_latency_avg{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "expr": "kafka_streams_stream_state_metrics_put_latency_avg{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", "hide": false, "instant": false, @@ -2214,7 +2214,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -2292,7 +2292,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_streams_stream_state_metrics_put_latency_max{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "expr": "kafka_streams_stream_state_metrics_put_latency_max{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", "hide": false, "instant": false, @@ -2314,7 +2314,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -2392,7 +2392,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_streams_stream_state_metrics_put_if_absent_rate{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "expr": "kafka_streams_stream_state_metrics_put_if_absent_rate{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", "hide": false, "instant": false, @@ -2414,7 +2414,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -2492,7 +2492,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_streams_stream_state_metrics_put_if_absent_latency_avg{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "expr": "kafka_streams_stream_state_metrics_put_if_absent_latency_avg{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", "hide": false, "instant": false, @@ -2514,7 +2514,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -2592,7 +2592,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_streams_stream_state_metrics_put_if_absent_latency_max{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "expr": "kafka_streams_stream_state_metrics_put_if_absent_latency_max{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", "hide": false, "instant": false, @@ -2614,7 +2614,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -2692,7 +2692,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_streams_stream_state_metrics_fetch_rate{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "expr": "kafka_streams_stream_state_metrics_fetch_rate{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", "hide": false, "instant": false, @@ -2714,7 +2714,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -2792,7 +2792,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_streams_stream_state_metrics_fetch_latency_avg{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "expr": "kafka_streams_stream_state_metrics_fetch_latency_avg{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", "hide": false, "instant": false, @@ -2814,7 +2814,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -2892,7 +2892,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_streams_stream_state_metrics_fetch_latency_max{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "expr": "kafka_streams_stream_state_metrics_fetch_latency_max{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", "hide": false, "instant": false, @@ -2914,7 +2914,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -2992,7 +2992,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_streams_stream_state_metrics_delete_rate{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "expr": "kafka_streams_stream_state_metrics_delete_rate{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", "hide": false, "instant": false, @@ -3014,7 +3014,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -3092,7 +3092,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_streams_stream_state_metrics_delete_latency_avg{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "expr": "kafka_streams_stream_state_metrics_delete_latency_avg{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", "hide": false, "instant": false, @@ -3114,7 +3114,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -3192,7 +3192,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_streams_stream_state_metrics_delete_latency_max{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "expr": "kafka_streams_stream_state_metrics_delete_latency_max{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", "hide": false, "instant": false, @@ -3214,7 +3214,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -3292,7 +3292,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_streams_stream_state_metrics_restore_rate{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "expr": "kafka_streams_stream_state_metrics_restore_rate{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", "hide": false, "instant": false, @@ -3314,7 +3314,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -3392,7 +3392,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_streams_stream_state_metrics_restore_latency_avg{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "expr": "kafka_streams_stream_state_metrics_restore_latency_avg{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", "hide": false, "instant": false, @@ -3414,7 +3414,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -3492,7 +3492,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_streams_stream_state_metrics_restore_latency_max{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "expr": "kafka_streams_stream_state_metrics_restore_latency_max{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", "hide": false, "instant": false, @@ -3547,7 +3547,7 @@ "text": null, "value": null }, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "hide": 0, "includeAll": false, "label": "Environment", @@ -3574,7 +3574,7 @@ "text": null, "value": null }, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "hide": 0, "includeAll": false, "label": "ksqlDB cluster", @@ -3601,7 +3601,7 @@ "text": null, "value": null }, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "hide": 2, "includeAll": false, "label": "ksqlDB cluster ID", @@ -3628,7 +3628,7 @@ "text": null, "value": null }, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "hide": 0, "includeAll": true, "label": "ksqlDB server", diff --git a/cfk-prometheus-grafana/grafana/schema-registry-cluster.json b/cfk-prometheus-grafana/grafana/schema-registry-cluster.json index 30f4c28e..1027687a 100644 --- a/cfk-prometheus-grafana/grafana/schema-registry-cluster.json +++ b/cfk-prometheus-grafana/grafana/schema-registry-cluster.json @@ -62,7 +62,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": "Schema Registry online instances returning metrics.\n ", "editable": true, "error": false, @@ -161,7 +161,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": "Average number of registered schemas across the cluster.\n ", "editable": true, "error": false, @@ -244,7 +244,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": "Average number of schemas created, by type.\n ", "editable": true, "error": false, @@ -327,7 +327,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -410,8 +410,8 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, + "datasource": "${DS_PROMETHEUS}", + "description": "Number of active connections", "editable": true, "error": false, "fieldConfig": { @@ -471,7 +471,7 @@ "targets": [ { "datasource": null, - "expr": "sum(kafka_schema_registry_kafka_schema_registry_metrics_connection_count)", + "expr": "sum(kafka_schema_registry_kafka_schema_registry_metrics_connection_count{namespace=\"$env\"})", "format": "time_series", "hide": false, "instant": false, @@ -534,7 +534,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -612,7 +612,7 @@ "targets": [ { "datasource": null, - "expr": "irate(process_cpu_seconds_total{namespace=\"$env\",pod=~\"$sr_server\"}[5m])", + "expr": "irate(process_cpu_seconds_total{namespace=\"$env\",pod=\"$sr_server\"}[5m])", "format": "time_series", "hide": false, "instant": false, @@ -634,7 +634,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -712,7 +712,7 @@ "targets": [ { "datasource": null, - "expr": "sum without(area)(jvm_memory_bytes_used{namespace=\"$env\",pod=~\"$sr_server\"})", + "expr": "sum without(area)(jvm_memory_bytes_used{namespace=\"$env\",pod=\"$sr_server\"})", "format": "time_series", "hide": false, "instant": false, @@ -734,7 +734,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -812,7 +812,7 @@ "targets": [ { "datasource": null, - "expr": "sum without(gc)(irate(jvm_gc_collection_seconds_sum{namespace=\"$env\",pod=~\"$sr_server\"}[5m]))", + "expr": "sum without(gc)(irate(jvm_gc_collection_seconds_sum{namespace=\"$env\",pod=\"$sr_server\"}[5m]))", "format": "time_series", "hide": false, "instant": false, @@ -855,7 +855,7 @@ "text": null, "value": null }, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "hide": 0, "includeAll": false, "label": "Environment", @@ -882,7 +882,7 @@ "text": null, "value": null }, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "hide": 0, "includeAll": true, "label": "Server", diff --git a/grafana-dashboards/kafka-cluster.py b/grafana-dashboards/kafka-cluster.py index 0ff4857d..f05ee74e 100644 --- a/grafana-dashboards/kafka-cluster.py +++ b/grafana-dashboards/kafka-cluster.py @@ -40,7 +40,7 @@ def dashboard(ds="Prometheus", env_label="namespace", server_label="pod"): # Queries by_env = env_label + '="$env"' - by_env_and_server = env_label + '="$env",' + server_label + '=~"$broker"' + by_server = by_env + "," + server_label + '=~"$broker"' # Templating (variables) templating = G.Templating( @@ -168,7 +168,7 @@ def dashboard(ds="Prometheus", env_label="namespace", server_label="pod"): targets=[ G.Target( expr="sum(rate(kafka_network_requestmetrics_requestspersec{" - + by_env_and_server + + by_server + "}[5m]))", ), ], @@ -187,7 +187,7 @@ def dashboard(ds="Prometheus", env_label="namespace", server_label="pod"): targets=[ G.Target( expr="sum(kafka_log_log_size{" - + by_env_and_server + + by_server + "}) by (" + server_label + ")", @@ -211,7 +211,7 @@ def dashboard(ds="Prometheus", env_label="namespace", server_label="pod"): targets=[ G.Target( expr="sum(kafka_server_replicamanager_partitioncount{" - + by_env_and_server + + by_server + "})", ), ], @@ -230,7 +230,7 @@ def dashboard(ds="Prometheus", env_label="namespace", server_label="pod"): targets=[ G.Target( expr="sum(kafka_server_replicamanager_underreplicatedpartitions{" - + by_env_and_server + + by_server + "})", ), ], @@ -250,9 +250,7 @@ def dashboard(ds="Prometheus", env_label="namespace", server_label="pod"): dataSource=ds, targets=[ G.Target( - expr="sum(kafka_cluster_partition_underminisr{" - + by_env_and_server - + "})", + expr="sum(kafka_cluster_partition_underminisr{" + by_server + "})", ), ], reduceCalc="last", @@ -272,7 +270,7 @@ def dashboard(ds="Prometheus", env_label="namespace", server_label="pod"): targets=[ G.Target( expr="sum(kafka_controller_kafkacontroller_offlinepartitionscount{" - + by_env_and_server + + by_server + "})", ), ], @@ -292,7 +290,7 @@ def dashboard(ds="Prometheus", env_label="namespace", server_label="pod"): targets=[ G.Target( expr="sum(rate(kafka_server_brokertopicmetrics_bytesinpersec{" - + by_env_and_server + + by_server + "}[5m]))", ), ], @@ -312,7 +310,7 @@ def dashboard(ds="Prometheus", env_label="namespace", server_label="pod"): targets=[ G.Target( expr="sum(rate(kafka_server_brokertopicmetrics_bytesoutpersec{" - + by_env_and_server + + by_server + "}[5m]))", ), ], @@ -341,9 +339,7 @@ def dashboard(ds="Prometheus", env_label="namespace", server_label="pod"): dataSource=ds, targets=[ G.Target( - expr="irate(process_cpu_seconds_total{" - + by_env_and_server - + "}[5m])", + expr="irate(process_cpu_seconds_total{" + by_server + "}[5m])", legendFormat="{{" + server_label + "}}", ), ], @@ -360,9 +356,7 @@ def dashboard(ds="Prometheus", env_label="namespace", server_label="pod"): dataSource=ds, targets=[ G.Target( - expr="sum without(area)(jvm_memory_bytes_used{" - + by_env_and_server - + "})", + expr="sum without(area)(jvm_memory_bytes_used{" + by_server + "})", legendFormat="{{" + server_label + "}}", ), ], @@ -380,7 +374,7 @@ def dashboard(ds="Prometheus", env_label="namespace", server_label="pod"): targets=[ G.Target( expr="sum without(gc)(irate(jvm_gc_collection_seconds_sum{" - + by_env_and_server + + by_server + "}[5m]))", legendFormat="{{" + server_label + "}}", ), @@ -404,7 +398,7 @@ def dashboard(ds="Prometheus", env_label="namespace", server_label="pod"): targets=[ G.Target( expr="sum without(topic) (rate(kafka_server_brokertopicmetrics_messagesinpersec{" - + by_env_and_server + + by_server + "}[5m]))", legendFormat="{{" + server_label + "}}", ), @@ -423,7 +417,7 @@ def dashboard(ds="Prometheus", env_label="namespace", server_label="pod"): targets=[ G.Target( expr="sum without(topic) (rate(kafka_server_brokertopicmetrics_bytesinpersec{" - + by_env_and_server + + by_server + "}[5m]))", legendFormat="{{" + server_label + "}}", ), @@ -442,7 +436,7 @@ def dashboard(ds="Prometheus", env_label="namespace", server_label="pod"): targets=[ G.Target( expr="sum without(topic) (rate(kafka_server_brokertopicmetrics_bytesoutpersec{" - + by_env_and_server + + by_server + "}[5m]))", legendFormat="{{" + server_label + "}}", ), @@ -476,7 +470,7 @@ def dashboard(ds="Prometheus", env_label="namespace", server_label="pod"): targets=[ G.Target( expr="1-kafka_network_socketserver_networkprocessoravgidlepercent{" - + by_env_and_server + + by_server + "}", legendFormat="{{" + server_label + "}}", ), @@ -497,7 +491,7 @@ def dashboard(ds="Prometheus", env_label="namespace", server_label="pod"): targets=[ G.Target( expr="1-kafka_server_kafkarequesthandlerpool_requesthandleravgidlepercent_total{" - + by_env_and_server + + by_server + "}", legendFormat="{{" + server_label + "}}", ), @@ -537,7 +531,7 @@ def dashboard(ds="Prometheus", env_label="namespace", server_label="pod"): expr="sum without(" + known_labels + ")(rate(kafka_network_requestmetrics_requestspersec{" - + by_env_and_server + + by_server + "}[5m]))", legendFormat="{{request}}(v{{version}})", ), @@ -561,7 +555,7 @@ def dashboard(ds="Prometheus", env_label="namespace", server_label="pod"): expr="sum without(" + known_labels + ")(rate(kafka_network_requestmetrics_errorspersec{" - + by_env_and_server + + by_server + ',error!="NONE"}[5m]))', legendFormat="{{error}}@{{request}}", ), @@ -594,7 +588,7 @@ def dashboard(ds="Prometheus", env_label="namespace", server_label="pod"): targets=[ G.Target( expr="sum(kafka_server_socketservermetrics_connection_count{" - + by_env_and_server + + by_server + "}) by (" + server_label + ")", @@ -614,7 +608,7 @@ def dashboard(ds="Prometheus", env_label="namespace", server_label="pod"): targets=[ G.Target( expr="sum(kafka_server_socketservermetrics_connection_creation_rate{" - + by_env_and_server + + by_server + "}) by (" + server_label + ")", @@ -634,7 +628,7 @@ def dashboard(ds="Prometheus", env_label="namespace", server_label="pod"): targets=[ G.Target( expr="sum(kafka_server_socketservermetrics_connection_close_rate{" - + by_env_and_server + + by_server + "}) by (" + server_label + ")", @@ -655,7 +649,7 @@ def dashboard(ds="Prometheus", env_label="namespace", server_label="pod"): targets=[ G.Target( expr="sum(kafka_server_socketservermetrics_connection_count{" - + by_env_and_server + + by_server + "}) by (listener)", legendFormat="{{listener}}", ), @@ -673,7 +667,7 @@ def dashboard(ds="Prometheus", env_label="namespace", server_label="pod"): targets=[ G.Target( expr="sum(kafka_server_socketservermetrics_connection_creation_rate{" - + by_env_and_server + + by_server + "}) by (listener)", legendFormat="{{listener}}", ), @@ -691,7 +685,7 @@ def dashboard(ds="Prometheus", env_label="namespace", server_label="pod"): targets=[ G.Target( expr="sum(kafka_server_socketservermetrics_connection_close_rate{" - + by_env_and_server + + by_server + "}) by (listener)", legendFormat="{{listener}}", ), @@ -724,7 +718,7 @@ def dashboard(ds="Prometheus", env_label="namespace", server_label="pod"): targets=[ G.Target( expr="rate(kafka_server_replicamanager_isrshrinkspersec{" - + by_env_and_server + + by_server + "}[5m])", legendFormat="{{" + server_label + "}}", ), @@ -744,7 +738,7 @@ def dashboard(ds="Prometheus", env_label="namespace", server_label="pod"): targets=[ G.Target( expr="rate(kafka_server_replicamanager_isrexpandspersec{" - + by_env_and_server + + by_server + "}[5m])", legendFormat="{{" + server_label + "}}", ), @@ -778,7 +772,7 @@ def dashboard(ds="Prometheus", env_label="namespace", server_label="pod"): targets=[ G.Target( expr="kafka_network_requestmetrics_requestqueuetimems{" - + by_env_and_server + + by_server + ',quantile=~"$quantile",request="Produce"}', legendFormat="{{" + server_label + "}} ({{quantile}}th)", ), @@ -799,7 +793,7 @@ def dashboard(ds="Prometheus", env_label="namespace", server_label="pod"): targets=[ G.Target( expr="kafka_network_requestmetrics_localtimems{" - + by_env_and_server + + by_server + ',quantile=~"$quantile",request="Produce"}', legendFormat="{{" + server_label + "}} ({{quantile}}th)", ), @@ -820,7 +814,7 @@ def dashboard(ds="Prometheus", env_label="namespace", server_label="pod"): targets=[ G.Target( expr="kafka_network_requestmetrics_remotetimems{" - + by_env_and_server + + by_server + ',quantile=~"$quantile",request="Produce"}', legendFormat="{{" + server_label + "}} ({{quantile}}th)", ), @@ -841,7 +835,7 @@ def dashboard(ds="Prometheus", env_label="namespace", server_label="pod"): targets=[ G.Target( expr="kafka_network_requestmetrics_responsequeuetimems{" - + by_env_and_server + + by_server + ',quantile=~"$quantile",request="Produce"}', legendFormat="{{" + server_label + "}} ({{quantile}}th)", ), @@ -862,7 +856,7 @@ def dashboard(ds="Prometheus", env_label="namespace", server_label="pod"): targets=[ G.Target( expr="kafka_network_requestmetrics_responsesendtimems{" - + by_env_and_server + + by_server + ',quantile=~"$quantile",request="Produce"}', legendFormat="{{" + server_label + "}} ({{quantile}}th)", ), @@ -897,7 +891,7 @@ def dashboard(ds="Prometheus", env_label="namespace", server_label="pod"): targets=[ G.Target( expr="kafka_network_requestmetrics_requestqueuetimems{" - + by_env_and_server + + by_server + ',quantile=~"$quantile",request="Fetch"}', legendFormat="{{" + server_label + "}} ({{quantile}}th)", ), @@ -918,7 +912,7 @@ def dashboard(ds="Prometheus", env_label="namespace", server_label="pod"): targets=[ G.Target( expr="kafka_network_requestmetrics_localtimems{" - + by_env_and_server + + by_server + ',quantile=~"$quantile",request="Fetch"}', legendFormat="{{" + server_label + "}} ({{quantile}}th)", ), @@ -939,7 +933,7 @@ def dashboard(ds="Prometheus", env_label="namespace", server_label="pod"): targets=[ G.Target( expr="kafka_network_requestmetrics_remotetimems{" - + by_env_and_server + + by_server + ',quantile=~"$quantile",request="Fetch"}', legendFormat="{{" + server_label + "}} ({{quantile}}th)", ), @@ -960,7 +954,7 @@ def dashboard(ds="Prometheus", env_label="namespace", server_label="pod"): targets=[ G.Target( expr="kafka_network_requestmetrics_responsequeuetimems{" - + by_env_and_server + + by_server + ',quantile=~"$quantile",request="Fetch"}', legendFormat="{{" + server_label + "}} ({{quantile}}th)", ), @@ -981,7 +975,7 @@ def dashboard(ds="Prometheus", env_label="namespace", server_label="pod"): targets=[ G.Target( expr="kafka_network_requestmetrics_responsesendtimems{" - + by_env_and_server + + by_server + ',quantile=~"$quantile",request="Fetch"}', legendFormat="{{" + server_label + "}} ({{quantile}}th)", ), @@ -1016,7 +1010,7 @@ def dashboard(ds="Prometheus", env_label="namespace", server_label="pod"): targets=[ G.Target( expr="kafka_network_requestmetrics_requestqueuetimems{" - + by_env_and_server + + by_server + ',quantile=~"$quantile",request="FetchFollower"}', legendFormat="{{" + server_label + "}} ({{quantile}}th)", ), @@ -1037,7 +1031,7 @@ def dashboard(ds="Prometheus", env_label="namespace", server_label="pod"): targets=[ G.Target( expr="kafka_network_requestmetrics_localtimems{" - + by_env_and_server + + by_server + ',quantile=~"$quantile",request="FetchFollower"}', legendFormat="{{" + server_label + "}} ({{quantile}}th)", ), @@ -1058,7 +1052,7 @@ def dashboard(ds="Prometheus", env_label="namespace", server_label="pod"): targets=[ G.Target( expr="kafka_network_requestmetrics_remotetimems{" - + by_env_and_server + + by_server + ',quantile=~"$quantile",request="FetchFollower"}', legendFormat="{{" + server_label + "}} ({{quantile}}th)", ), @@ -1079,7 +1073,7 @@ def dashboard(ds="Prometheus", env_label="namespace", server_label="pod"): targets=[ G.Target( expr="kafka_network_requestmetrics_responsequeuetimems{" - + by_env_and_server + + by_server + ',quantile=~"$quantile",request="FetchFollower"}', legendFormat="{{" + server_label + "}} ({{quantile}}th)", ), @@ -1100,7 +1094,7 @@ def dashboard(ds="Prometheus", env_label="namespace", server_label="pod"): targets=[ G.Target( expr="kafka_network_requestmetrics_responsesendtimems{" - + by_env_and_server + + by_server + ',quantile=~"$quantile",request="FetchFollower"}', legendFormat="{{" + server_label + "}} ({{quantile}}th)", ), @@ -1132,7 +1126,7 @@ def dashboard(ds="Prometheus", env_label="namespace", server_label="pod"): targets=[ G.Target( expr="kafka_coordinator_group_groupmetadatamanager_numgroups{" - + by_env_and_server + + by_server + "}", legendFormat="{{" + server_label + "}}", ), @@ -1150,31 +1144,31 @@ def dashboard(ds="Prometheus", env_label="namespace", server_label="pod"): targets=[ G.Target( expr="sum(kafka_coordinator_group_groupmetadatamanager_numgroupsstable{" - + by_env_and_server + + by_server + "})", legendFormat="stable", ), G.Target( expr="sum(kafka_coordinator_group_groupmetadatamanager_numgroupspreparingrebalance{" - + by_env_and_server + + by_server + "})", legendFormat="preparing_rebalance", ), G.Target( expr="sum(kafka_coordinator_group_groupmetadatamanager_numgroupsdead{" - + by_env_and_server + + by_server + "})", legendFormat="dead", ), G.Target( expr="sum(kafka_coordinator_group_groupmetadatamanager_numgroupscompletingrebalance{" - + by_env_and_server + + by_server + "})", legendFormat="completing_rebalance", ), G.Target( expr="sum(kafka_coordinator_group_groupmetadatamanager_numgroupsempty{" - + by_env_and_server + + by_server + "})", legendFormat="empty", ), @@ -1208,7 +1202,7 @@ def dashboard(ds="Prometheus", env_label="namespace", server_label="pod"): targets=[ G.Target( expr="sum(kafka_server_brokertopicmetrics_producemessageconversionspersec{" - + by_env_and_server + + by_server + "})", legendFormat="{{" + server_label + "}}", ), @@ -1229,7 +1223,7 @@ def dashboard(ds="Prometheus", env_label="namespace", server_label="pod"): targets=[ G.Target( expr="sum(kafka_server_brokertopicmetrics_fetchmessageconversionspersec{" - + by_env_and_server + + by_server + "})", legendFormat="{{" + server_label + "}}", ), @@ -1249,7 +1243,7 @@ def dashboard(ds="Prometheus", env_label="namespace", server_label="pod"): targets=[ G.Target( expr="sum(kafka_server_socketservermetrics_connections{" - + by_env_and_server + + by_server + "}) by (client_software_name,client_software_version)", legendFormat="{{client_software_name}} (v{{client_software_version}})", ), diff --git a/grafana-dashboards/kafka-connect-cluster.py b/grafana-dashboards/kafka-connect-cluster.py index 472d8796..4378469e 100644 --- a/grafana-dashboards/kafka-connect-cluster.py +++ b/grafana-dashboards/kafka-connect-cluster.py @@ -3,41 +3,73 @@ def dashboard( + ds="Prometheus", env_label="namespace", server_label="' + server_label + '", connect_cluster_label="app", ): + """ + Kafka Connect cluster dashboard + It includes: + - Cluster overview + - System resources + - Connect workers + - Tasks + - Task Errors + - Source Tasks + - Sink Tasks + + Structure: + - Default sizes + - Queries + - Templating (variables) + - Panel groups + - Dashboard definition + + Dashboard is defined by a name, it includes the variables to template panels, and then adds the panels. + Panels are grouped in Row to load only needed panels and load others on demand. + + Invariants: + - Max width: 24 + """ + + # Default sizes default_height = 5 stat_width = 4 ts_width = 8 + # Queries + by_env = env_label + '="$env"' + by_cluster = by_env + "," + connect_cluster_label + '="$connect_cluster"' + by_server = by_cluster + "," + server_label + '=~"$connect_worker"' + by_connector = by_server + ',connector=~"$connector"' + + # Templating (variables) templating = G.Templating( list=[ G.Template( name="env", label="Environment", - dataSource="Prometheus", + dataSource=ds, query="label_values(" + env_label + ")", ), G.Template( name="connect_cluster", label="Connect cluster", - dataSource="Prometheus", + dataSource=ds, query="label_values(kafka_connect_connect_worker_metrics_connector_total_task_count{" - + env_label - + '="$env"}, ' + + by_env + + "}, " + connect_cluster_label + ")", ), G.Template( name="connect_worker", label="Connect worker", - dataSource="Prometheus", + dataSource=ds, query="label_values(kafka_connect_connect_worker_metrics_connector_total_task_count{" - + env_label - + '="$env",' - + connect_cluster_label - + '="$connect_cluster"}, ' + + by_cluster + + "}, " + server_label + ")", multi=True, @@ -46,36 +78,36 @@ def dashboard( G.Template( name="connector", label="Connector", - dataSource="Prometheus", + dataSource=ds, query="label_values(kafka_connect_connect_worker_metrics_connector_total_task_count{" - + env_label - + '="$env",' - + connect_cluster_label - + '="$connect_cluster"}, connector)', + + by_cluster + + "}, connector)", multi=True, includeAll=True, ), ] ) - hc_base = 0 - hc_panels = [ + # Panel groups + ## Cluster overview: + ### When updating descriptions on these panels, also update descriptions in confluent-platform.py + overview_base = 0 + overview_panels = [ G.RowPanel( - title="Overview", - gridPos=G.GridPos(h=1, w=24, x=0, y=hc_base), + title="Cluster Overview", + gridPos=G.GridPos(h=1, w=24, x=0, y=overview_base), ), + # First level G.Stat( title="Connect: Online Workers", description="""Kafka Connect online workers returning metrics. """, - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="count(kafka_connect_app_info{" - + env_label - + '="$env",' - + connect_cluster_label - + '="$connect_cluster",version!=""})', + + by_cluster + + ',version!=""})', ), ], reduceCalc="last", @@ -83,21 +115,19 @@ def dashboard( G.Threshold(index=0, value=0.0, color="blue"), ], gridPos=G.GridPos( - h=default_height, w=stat_width, x=stat_width * 0, y=hc_base + h=default_height, w=stat_width, x=stat_width * 0, y=overview_base ), ), G.Stat( title="Connect: Sum of Total Tasks", description="""Number of tasks deployed on Kafka Connect cluster. """, - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="sum(kafka_connect_connect_worker_metrics_connector_total_task_count{" - + env_label - + '="$env",' - + connect_cluster_label - + '="$connect_cluster"})', + + by_cluster + + "})", ), ], reduceCalc="last", @@ -105,7 +135,7 @@ def dashboard( G.Threshold(index=0, value=0.0, color="blue"), ], gridPos=G.GridPos( - h=default_height, w=stat_width, x=stat_width * 1, y=hc_base + h=default_height, w=stat_width, x=stat_width * 1, y=overview_base ), ), G.Stat( @@ -113,14 +143,12 @@ def dashboard( description="""Number of Running Tasks on the Kafka Connect cluster. Ideally, this number should be equal to the total number of tasks deployed. """, - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="sum(kafka_connect_connect_worker_metrics_connector_running_task_count{" - + env_label - + '="$env",' - + connect_cluster_label - + '="$connect_cluster"})', + + by_cluster + + "})", ), ], reduceCalc="last", @@ -129,7 +157,7 @@ def dashboard( G.Threshold(index=1, value=1.0, color="green"), ], gridPos=G.GridPos( - h=default_height, w=stat_width, x=stat_width * 2, y=hc_base + h=default_height, w=stat_width, x=stat_width * 2, y=overview_base ), ), G.Stat( @@ -137,14 +165,12 @@ def dashboard( description="""Number of Paused Tasks on the Kafka Connect cluster. Ideally, this number should be zero, as tasks should be running. """, - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="sum(kafka_connect_connect_worker_metrics_connector_paused_task_count{" - + env_label - + '="$env",' - + connect_cluster_label - + '="$connect_cluster"})', + + by_cluster + + "})", ), ], reduceCalc="last", @@ -153,7 +179,7 @@ def dashboard( G.Threshold(index=1, value=1.0, color="yellow"), ], gridPos=G.GridPos( - h=default_height, w=stat_width, x=stat_width * 3, y=hc_base + h=default_height, w=stat_width, x=stat_width * 3, y=overview_base ), ), G.Stat( @@ -162,14 +188,12 @@ def dashboard( Ideally, this number should be zero, as tasks should be running. It's recommended alerting when this value is higher than 0. """, - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="sum(kafka_connect_connect_worker_metrics_connector_failed_task_count{" - + env_label - + '="$env",' - + connect_cluster_label - + '="$connect_cluster"})', + + by_cluster + + "})", ), ], reduceCalc="last", @@ -178,7 +202,7 @@ def dashboard( G.Threshold(index=1, value=1.0, color="red"), ], gridPos=G.GridPos( - h=default_height, w=stat_width, x=stat_width * 4, y=hc_base + h=default_height, w=stat_width, x=stat_width * 4, y=overview_base ), ), G.Stat( @@ -186,14 +210,12 @@ def dashboard( description="""Informative value. Time since last rebalance. When this value is continuously and repeatedly low means some connectors are failing and rebalancing is triggered constantly. """, - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="kafka_connect_connect_worker_rebalance_metrics_time_since_last_rebalance_ms{" - + env_label - + '="$env",' - + connect_cluster_label - + '="$connect_cluster"} >= 0', + + by_cluster + + "} >= 0", legendFormat="{{" + server_label + "}}", ), ], @@ -204,28 +226,23 @@ def dashboard( G.Threshold(index=0, value=0.0, color="blue"), ], gridPos=G.GridPos( - h=default_height, w=stat_width, x=stat_width * 5, y=hc_base + h=default_height, w=stat_width, x=stat_width * 5, y=overview_base ), ), + # Second level G.Table( title="Connect Workers", - dataSource="Prometheus", + description="""Connect workers metadata and stats. + """, + dataSource=ds, targets=[ G.Target( - expr="kafka_connect_app_info{" - + env_label - + '="$env",' - + connect_cluster_label - + '="$connect_cluster",start_time_ms!=""}', + expr="kafka_connect_app_info{" + by_cluster + ',start_time_ms!=""}', format="table", instant=True, ), G.Target( - expr="kafka_connect_app_info{" - + env_label - + '="$env",' - + connect_cluster_label - + '="$connect_cluster",version!=""}', + expr="kafka_connect_app_info{" + by_cluster + ',version!=""}', format="table", instant=True, ), @@ -233,10 +250,8 @@ def dashboard( expr="sum by (" + server_label + ") (kafka_connect_connect_worker_metrics_connector_count{" - + env_label - + '="$env",' - + connect_cluster_label - + '="$connect_cluster"})', + + by_cluster + + "})", format="table", instant=True, ), @@ -244,10 +259,8 @@ def dashboard( expr="sum by (" + server_label + ") (kafka_connect_connect_worker_metrics_connector_startup_success_total{" - + env_label - + '="$env",' - + connect_cluster_label - + '="$connect_cluster"})', + + by_cluster + + "})", format="table", instant=True, ), @@ -255,10 +268,8 @@ def dashboard( expr="sum by (" + server_label + ") (kafka_connect_connect_worker_metrics_connector_startup_failure_total{" - + env_label - + '="$env",' - + connect_cluster_label - + '="$connect_cluster"})', + + by_cluster + + "})", format="table", instant=True, ), @@ -266,10 +277,8 @@ def dashboard( expr="sum by (" + server_label + ") (kafka_connect_connect_worker_metrics_task_count{" - + env_label - + '="$env",' - + connect_cluster_label - + '="$connect_cluster"})', + + by_cluster + + "})", format="table", instant=True, ), @@ -277,10 +286,8 @@ def dashboard( expr="sum by (" + server_label + ") (kafka_connect_connect_worker_metrics_task_startup_success_total{" - + env_label - + '="$env",' - + connect_cluster_label - + '="$connect_cluster"})', + + by_cluster + + "})", format="table", instant=True, ), @@ -288,10 +295,8 @@ def dashboard( expr="sum by (" + server_label + ") (kafka_connect_connect_worker_metrics_task_startup_failure_total{" - + env_label - + '="$env",' - + connect_cluster_label - + '="$connect_cluster"})', + + by_cluster + + "})", format="table", instant=True, ), @@ -345,54 +350,45 @@ def dashboard( }, }, ], - gridPos=G.GridPos(h=default_height, w=24, x=0, y=hc_base + 1), + gridPos=G.GridPos(h=default_height, w=24, x=0, y=overview_base + 1), ), + # Third level G.Table( title="Connectors", - dataSource="Prometheus", + description="""Connectors deployed and task stats. + """, + dataSource=ds, targets=[ G.Target( - expr="kafka_connect_connector_info{" - + env_label - + '="$env",' - + connect_cluster_label - + '="$connect_cluster"}', + expr="kafka_connect_connector_info{" + by_cluster + "}", format="table", instant=True, ), G.Target( expr="sum by (connector) (kafka_connect_connect_worker_metrics_connector_total_task_count{" - + env_label - + '="$env",' - + connect_cluster_label - + '="$connect_cluster"})', + + by_cluster + + "})", format="table", instant=True, ), G.Target( expr="sum by (connector) (kafka_connect_connect_worker_metrics_connector_running_task_count{" - + env_label - + '="$env",' - + connect_cluster_label - + '="$connect_cluster"})', + + by_cluster + + "})", format="table", instant=True, ), G.Target( expr="sum by (connector) (kafka_connect_connect_worker_metrics_connector_failed_task_count{" - + env_label - + '="$env",' - + connect_cluster_label - + '="$connect_cluster"})', + + by_cluster + + "})", format="table", instant=True, ), G.Target( expr="sum by (connector) (kafka_connect_connect_worker_metrics_connector_paused_task_count{" - + env_label - + '="$env",' - + connect_cluster_label - + '="$connect_cluster"})', + + by_cluster + + "})", format="table", instant=True, ), @@ -426,49 +422,50 @@ def dashboard( }, }, ], - gridPos=G.GridPos(h=default_height, w=24, x=0, y=hc_base + 2), + gridPos=G.GridPos(h=default_height, w=24, x=0, y=overview_base + 2), ), + # Forth level G.TimeSeries( title="Tasks Running Ratio", - dataSource="Prometheus", + description="""How much time the connector tasks are in running state. + """, + dataSource=ds, targets=[ G.Target( expr="kafka_connect_connector_task_metrics_running_ratio{" - + env_label - + '="$env",' - + connect_cluster_label - + '="$connect_cluster"}', + + by_cluster + + "}", legendFormat="{{connector}}", ), ], legendDisplayMode="table", legendCalcs=["max", "mean", "last"], unit="percentunit", - gridPos=G.GridPos(h=default_height * 2, w=12, x=0, y=hc_base + 3), + gridPos=G.GridPos(h=default_height * 2, w=12, x=0, y=overview_base + 3), ), G.TimeSeries( - title="Rebalance Latency", - dataSource="Prometheus", + title="Rebalance Latency (avg.)", + description="""Average ime spent on rebalance state. + """, + dataSource=ds, targets=[ G.Target( expr="kafka_connect_connect_worker_rebalance_metrics_rebalance_avg_time_ms{" - + env_label - + '="$env",' - + connect_cluster_label - + '="$connect_cluster"}', + + by_cluster + + "}", legendFormat="{{" + server_label + "}}", ), ], legendDisplayMode="table", legendCalcs=["max", "mean", "last"], unit="ms", - gridPos=G.GridPos(h=default_height * 2, w=12, x=12, y=hc_base + 3), + gridPos=G.GridPos(h=default_height * 2, w=12, x=12, y=overview_base + 3), ), ] ## System resources: ### When updating descriptions on these panels, also update descriptions in other cluster dashboards - system_base = hc_base + 4 + system_base = overview_base + 4 system_panels = [ G.RowPanel( title="System", @@ -479,16 +476,10 @@ def dashboard( description="""Rate of CPU seconds used by the Java process. 100% usage represents one core. If there are multiple cores, the total capacity should be 100% * number_cores.""", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( - expr="irate(process_cpu_seconds_total{" - + env_label - + '="$env",' - + connect_cluster_label - + '="$connect_cluster",' - + server_label - + '=~"$connect_worker"}[5m])', + expr="irate(process_cpu_seconds_total{" + by_server + "}[5m])", legendFormat="{{" + server_label + "}}", ), ], @@ -502,16 +493,10 @@ def dashboard( G.TimeSeries( title="Memory usage", description="""Sum of JVM memory used, without including areas (e.g. heap size).""", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( - expr="sum without(area)(jvm_memory_bytes_used{" - + env_label - + '="$env",' - + connect_cluster_label - + '="$connect_cluster",' - + server_label - + '=~"$connect_worker"})', + expr="sum without(area)(jvm_memory_bytes_used{" + by_server + "})", legendFormat="{{" + server_label + "}}", ), ], @@ -525,16 +510,12 @@ def dashboard( G.TimeSeries( title="GC collection", description="""Sum of seconds used by Garbage Collection.""", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="sum without(gc)(irate(jvm_gc_collection_seconds_sum{" - + env_label - + '="$env",' - + connect_cluster_label - + '="$connect_cluster",' - + server_label - + '=~"$connect_worker"}[5m]))', + + by_server + + "}[5m]))", legendFormat="{{" + server_label + "}}", ), ], @@ -547,20 +528,18 @@ def dashboard( ), ] + ## Workers: worker_base = system_base + 1 worker_inner = [ G.TimeSeries( title="Incoming Byte Rate", - dataSource="Prometheus", + description="Incoming byte rate per second per worker.", + dataSource=ds, targets=[ G.Target( expr="kafka_connect_connect_metrics_incoming_byte_rate{" - + env_label - + '="$env",' - + connect_cluster_label - + '="$connect_cluster",' - + server_label - + '=~"$connect_worker"}', + + by_server + + "}", legendFormat="{{" + server_label + "}}", ), ], @@ -573,16 +552,13 @@ def dashboard( ), G.TimeSeries( title="Outgoing Byte Rate", - dataSource="Prometheus", + description="Outgoing byte rate per second per worker.", + dataSource=ds, targets=[ G.Target( expr="kafka_connect_connect_metrics_outgoing_byte_rate{" - + env_label - + '="$env",' - + connect_cluster_label - + '="$connect_cluster",' - + server_label - + '=~"$connect_worker"}', + + by_server + + "}", legendFormat="{{" + server_label + "}}", ), ], @@ -595,16 +571,11 @@ def dashboard( ), G.TimeSeries( title="IO Ratio", - dataSource="Prometheus", + description="Fraction of time the I/O thread spent doing I/O", + dataSource=ds, targets=[ G.Target( - expr="kafka_connect_connect_metrics_io_ratio{" - + env_label - + '="$env",' - + connect_cluster_label - + '="$connect_cluster",' - + server_label - + '=~"$connect_worker"}', + expr="kafka_connect_connect_metrics_io_ratio{" + by_server + "}", legendFormat="{{" + server_label + "}}", ), ], @@ -617,16 +588,13 @@ def dashboard( ), G.TimeSeries( title="Network IO Rate", - dataSource="Prometheus", + description="Average number of network operations (reads or writes) on all connections per second", + dataSource=ds, targets=[ G.Target( expr="kafka_connect_connect_metrics_network_io_rate{" - + env_label - + '="$env",' - + connect_cluster_label - + '="$connect_cluster",' - + server_label - + '=~"$connect_worker"}', + + by_server + + "}", legendFormat="{{" + server_label + "}}", ), ], @@ -639,16 +607,13 @@ def dashboard( ), G.TimeSeries( title="Active Connections", - dataSource="Prometheus", + description="Number of active connections", + dataSource=ds, targets=[ G.Target( expr="kafka_connect_connect_metrics_connection_count{" - + env_label - + '="$env",' - + connect_cluster_label - + '="$connect_cluster",' - + server_label - + '=~"$connect_worker"}', + + by_server + + "}", legendFormat="{{" + server_label + "}}", ), ], @@ -659,27 +624,20 @@ def dashboard( ), ), G.TimeSeries( - title="Authentications", - dataSource="Prometheus", + title="Rate of Authentication", + description="Successful and failed authentications per second.", + dataSource=ds, targets=[ G.Target( expr="kafka_connect_connect_metrics_successful_authentication_rate{" - + env_label - + '="$env",' - + connect_cluster_label - + '="$connect_cluster",' - + server_label - + '=~"$connect_worker"}', + + by_server + + "}", legendFormat="{{" + server_label + "}} (success)", ), G.Target( expr="kafka_connect_connect_metrics_failed_authentication_total{" - + env_label - + '="$env",' - + connect_cluster_label - + '="$connect_cluster",' - + server_label - + '=~"$connect_worker"}', + + by_server + + "}", legendFormat="{{" + server_label + "}} (failed)", ), ], @@ -699,94 +657,82 @@ def dashboard( ), ] + ## Tasks: tasks_base = worker_base + 1 tasks_inner = [ G.TimeSeries( - title="Batch Size (Avg.)", - dataSource="Prometheus", + title="Batch Size", + description="Maximum and average size of the batches processed by the connector task.", + dataSource=ds, targets=[ G.Target( expr="kafka_connect_connector_task_metrics_batch_size_avg{" - + env_label - + '="$env",' - + connect_cluster_label - + '="$connect_cluster",' - + server_label - + '=~"$connect_worker",connector=~"$connector"}', - legendFormat="{{connector}}[{{task}}]", + + by_connector + + "}", + legendFormat="{{connector}}[{{task}}] (avg.)", ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="bytes", - gridPos=G.GridPos( - h=default_height * 2, w=ts_width, x=ts_width * 0, y=tasks_base - ), - ), - G.TimeSeries( - title="Batch Size (Max.)", - dataSource="Prometheus", - targets=[ G.Target( expr="kafka_connect_connector_task_metrics_batch_size_max{" - + env_label - + '="$env",' - + connect_cluster_label - + '="$connect_cluster",' - + server_label - + '=~"$connect_worker",connector=~"$connector"}', - legendFormat="{{connector}}[{{task}}]", + + by_connector + + "}", + legendFormat="{{connector}}[{{task}}] (max.)", ), ], legendDisplayMode="table", legendCalcs=["max", "mean", "last"], unit="bytes", gridPos=G.GridPos( - h=default_height * 2, w=ts_width, x=ts_width * 1, y=tasks_base + h=default_height * 2, w=ts_width, x=ts_width * 0, y=tasks_base ), ), G.TimeSeries( - title="Offset commit success %", - dataSource="Prometheus", + title="Offset commit", + description="Percentage of offset commit successful and failed.", + dataSource=ds, targets=[ G.Target( expr="kafka_connect_connector_task_metrics_offset_commit_success_percentage{" - + env_label - + '="$env",' - + connect_cluster_label - + '="$connect_cluster",' - + server_label - + '=~"$connect_worker",connector=~"$connector"}', - legendFormat="{{connector}}[{{task}}]", + + by_connector + + "}", + legendFormat="{{connector}}[{{task}}] (success)", + ), + G.Target( + expr="kafka_connect_connector_task_metrics_offset_commit_failure_percentage{" + + by_connector + + "}", + legendFormat="{{connector}}[{{task}}] (failure)", ), ], legendDisplayMode="table", legendCalcs=["max", "mean", "last"], unit="percentunit", gridPos=G.GridPos( - h=default_height * 2, w=ts_width, x=ts_width * 0, y=tasks_base + 1 + h=default_height * 2, w=ts_width, x=ts_width * 0, y=tasks_base ), ), G.TimeSeries( - title="Offset commit avg. latency", - dataSource="Prometheus", + title="Offset commit latency", + description="Average and Maximum time in milliseconds taken by the task to commit offsets", + dataSource=ds, targets=[ G.Target( expr="kafka_connect_connector_task_metrics_offset_commit_avg_time_ms{" - + env_label - + '="$env",' - + connect_cluster_label - + '="$connect_cluster",' - + server_label - + '=~"$connect_worker",connector=~"$connector"}', - legendFormat="{{connector}}[{{task}}]", + + by_connector + + "}", + legendFormat="{{connector}}[{{task}}] (avg.)", + ), + G.Target( + expr="kafka_connect_connector_task_metrics_offset_commit_max_time_ms{" + + by_connector + + "}", + legendFormat="{{connector}}[{{task}}] (max.)", ), ], legendDisplayMode="table", legendCalcs=["max", "mean", "last"], unit="ms", gridPos=G.GridPos( - h=default_height * 2, w=ts_width, x=ts_width * 1, y=tasks_base + 1 + h=default_height * 2, w=ts_width, x=ts_width * 2, y=tasks_base ), ), ] @@ -799,20 +745,19 @@ def dashboard( ), ] - task_errors_base = tasks_base + 2 + ## Task Errors: + task_errors_base = tasks_base + 1 task_errors_inner = [ + # First layer G.TimeSeries( title="Total Record Failures", - dataSource="Prometheus", + description="Total number of failures seen by task.", + dataSource=ds, targets=[ G.Target( expr="kafka_connect_task_error_metrics_total_record_failures{" - + env_label - + '="$env",' - + connect_cluster_label - + '="$connect_cluster",' - + server_label - + '=~"$connect_worker",connector=~"$connector"}', + + by_connector + + "}", legendFormat="{{connector}}[{{task}}]", ), ], @@ -824,16 +769,13 @@ def dashboard( ), G.TimeSeries( title="Total Record Error", - dataSource="Prometheus", + description="Total number of errors seen by task.", + dataSource=ds, targets=[ G.Target( expr="kafka_connect_task_error_metrics_total_record_errors{" - + env_label - + '="$env",' - + connect_cluster_label - + '="$connect_cluster",' - + server_label - + '=~"$connect_worker",connector=~"$connector"}', + + by_connector + + "}", legendFormat="{{connector}}[{{task}}]", ), ], @@ -845,16 +787,13 @@ def dashboard( ), G.TimeSeries( title="Total Records Skipped", - dataSource="Prometheus", + description="Total number of records skipped seen by task.", + dataSource=ds, targets=[ G.Target( expr="kafka_connect_task_error_metrics_total_records_skipped{" - + env_label - + '="$env",' - + connect_cluster_label - + '="$connect_cluster",' - + server_label - + '=~"$connect_worker",connector=~"$connector"}', + + by_connector + + "}", legendFormat="{{connector}}[{{task}}]", ), ], @@ -864,18 +803,16 @@ def dashboard( h=default_height * 2, w=ts_width, x=ts_width * 2, y=task_errors_base ), ), + # Second layer G.TimeSeries( title="Total Errors Logged", - dataSource="Prometheus", + description="Total number of records logged seen by task.", + dataSource=ds, targets=[ G.Target( expr="kafka_connect_task_error_metrics_total_errors_logged{" - + env_label - + '="$env",' - + connect_cluster_label - + '="$connect_cluster",' - + server_label - + '=~"$connect_worker",connector=~"$connector"}', + + by_connector + + "}", legendFormat="{{connector}}[{{task}}]", ), ], @@ -887,16 +824,13 @@ def dashboard( ), G.TimeSeries( title="Total Retries", - dataSource="Prometheus", + description="Total number of retries seen by task.", + dataSource=ds, targets=[ G.Target( expr="kafka_connect_task_error_metrics_total_retries{" - + env_label - + '="$env",' - + connect_cluster_label - + '="$connect_cluster",' - + server_label - + '=~"$connect_worker",connector=~"$connector"}', + + by_connector + + "}", legendFormat="{{connector}}[{{task}}]", ), ], @@ -908,16 +842,13 @@ def dashboard( ), G.TimeSeries( title="Dead Letter Topic Requests", - dataSource="Prometheus", + description="Number of produce requests to dead letter topics.", + dataSource=ds, targets=[ G.Target( expr="kafka_connect_task_error_metrics_deadletterqueue_produce_requests{" - + env_label - + '="$env",' - + connect_cluster_label - + '="$connect_cluster",' - + server_label - + '=~"$connect_worker",connector=~"$connector"}', + + by_connector + + "}", legendFormat="{{connector}}[{{task}}]", ), ], @@ -937,64 +868,46 @@ def dashboard( ), ] + ## Source tasks: source_base = task_errors_base + 2 source_inner = [ G.TimeSeries( - title="Poll Batch Avg. Latency", - dataSource="Prometheus", + title="Poll Batch Latency", + description="Average and Maximum time in milliseconds taken by this task to poll for a batch of source records", + dataSource=ds, targets=[ G.Target( expr="kafka_connect_source_task_metrics_poll_batch_avg_time_ms{" - + env_label - + '="$env",' - + connect_cluster_label - + '="$connect_cluster",' - + server_label - + '=~"$connect_worker",connector=~"$connector"}', - legendFormat="{{connector}}[{{task}}]", + + by_connector + + "}", + legendFormat="{{connector}}[{{task}}] (avg.)", ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="ms", - gridPos=G.GridPos( - h=default_height * 2, w=ts_width, x=ts_width * 0, y=source_base - ), - ), - G.TimeSeries( - title="Poll Batch Max. Latency", - dataSource="Prometheus", - targets=[ G.Target( expr="kafka_connect_source_task_metrics_poll_batch_max_time_ms{" - + env_label - + '="$env",' - + connect_cluster_label - + '="$connect_cluster",' - + server_label - + '=~"$connect_worker",connector=~"$connector"}', - legendFormat="{{connector}}[{{task}}]", + + by_connector + + "}", + legendFormat="{{connector}}[{{task}}] (max.)", ), ], legendDisplayMode="table", legendCalcs=["max", "mean", "last"], unit="ms", gridPos=G.GridPos( - h=default_height * 2, w=ts_width, x=ts_width * 1, y=source_base + h=default_height * 2, w=ts_width, x=ts_width * 0, y=source_base ), ), G.TimeSeries( title="Source Record Poll Rate", - dataSource="Prometheus", + description="""Before transformations are applied, + this is the average per-second number of records produced or + polled by the task belonging to the named source connector in the worker + """, + dataSource=ds, targets=[ G.Target( expr="kafka_connect_source_task_metrics_source_record_poll_rate{" - + env_label - + '="$env",' - + connect_cluster_label - + '="$connect_cluster",' - + server_label - + '=~"$connect_worker",connector=~"$connector"}', + + by_connector + + "}", legendFormat="{{connector}}[{{task}}]", ), ], @@ -1002,21 +915,22 @@ def dashboard( legendCalcs=["max", "mean", "last"], unit="ops", gridPos=G.GridPos( - h=default_height * 2, w=ts_width, x=ts_width * 0, y=source_base + 1 + h=default_height * 2, w=ts_width, x=ts_width * 1, y=source_base ), ), G.TimeSeries( title="Source Record Write Rate", - dataSource="Prometheus", + description="""After transformations are applied, + this is the average per-second number of records output from the transformations and + written to Kafka for the task belonging to the named source connector in the worker + (excludes any records filtered out by the transformations) + """, + dataSource=ds, targets=[ G.Target( expr="kafka_connect_source_task_metrics_source_record_write_rate{" - + env_label - + '="$env",' - + connect_cluster_label - + '="$connect_cluster",' - + server_label - + '=~"$connect_worker",connector=~"$connector"}', + + by_connector + + "}", legendFormat="{{connector}}[{{task}}]", ), ], @@ -1024,7 +938,7 @@ def dashboard( legendCalcs=["max", "mean", "last"], unit="ops", gridPos=G.GridPos( - h=default_height * 2, w=ts_width, x=ts_width * 1, y=source_base + 1 + h=default_height * 2, w=ts_width, x=ts_width * 2, y=source_base ), ), ] @@ -1037,21 +951,24 @@ def dashboard( ), ] - sink_base = source_base + 2 + ## Sink tasks: + sink_base = source_base + 1 sink_inner = [ G.TimeSeries( - title="Put Batch Avg. Latency", - dataSource="Prometheus", + title="Put Batch Latency", + dataSource=ds, targets=[ G.Target( expr="kafka_connect_sink_task_metrics_put_batch_avg_time_ms{" - + env_label - + '="$env",' - + connect_cluster_label - + '="$connect_cluster",' - + server_label - + '=~"$connect_worker",connector=~"$connector"}', - legendFormat="{{connector}}[{{task}}]", + + by_connector + + "}", + legendFormat="{{connector}}[{{task}}] (avg.)", + ), + G.Target( + expr="kafka_connect_sink_task_metrics_put_batch_max_time_ms{" + + by_connector + + "}", + legendFormat="{{connector}}[{{task}}] (max.)", ), ], legendDisplayMode="table", @@ -1062,39 +979,61 @@ def dashboard( ), ), G.TimeSeries( - title="Put Batch Max. Latency", - dataSource="Prometheus", + title="Sink Record Read Rate", + description="""Before transformations are applied, + this is the average per-second number of records read from Kafka + for the task belonging to the named sink connector in the worker + """, + dataSource=ds, targets=[ G.Target( - expr="kafka_connect_sink_task_metrics_put_batch_max_time_ms{" - + env_label - + '="$env",' - + connect_cluster_label - + '="$connect_cluster",' - + server_label - + '=~"$connect_worker",connector=~"$connector"}', + expr="kafka_connect_sink_task_metrics_sink_record_read_rate{" + + by_connector + + "}", legendFormat="{{connector}}[{{task}}]", ), ], legendDisplayMode="table", legendCalcs=["max", "mean", "last"], - unit="ms", + unit="ops", gridPos=G.GridPos( h=default_height * 2, w=ts_width, x=ts_width * 1, y=sink_base ), ), + G.TimeSeries( + title="Sink Record Send Rate", + description="""After transformations are applied, + this is the average per-second number of records output from the transformations and + sent to the task belonging to the named sink connector in the worker + (excludes any records filtered out by the transformations) + """, + dataSource=ds, + targets=[ + G.Target( + expr="kafka_connect_sink_task_metrics_sink_record_send_rate{" + + by_connector + + "}", + legendFormat="{{connector}}[{{task}}]", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ops", + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=ts_width * 2, y=sink_base + ), + ), G.TimeSeries( title="Partition Count", - dataSource="Prometheus", + description="""Number of topic partitions assigned to the task and + which belong to the named sink connector in the worker + """, + dataSource=ds, targets=[ G.Target( expr="kafka_connect_sink_task_metrics_partition_count{" - + env_label - + '="$env",' - + connect_cluster_label - + '="$connect_cluster",' - + server_label - + '=~"$connect_worker",connector=~"$connector"}', + + by_connector + + "}", legendFormat="{{connector}}[{{task}}]", ), ], @@ -1114,8 +1053,9 @@ def dashboard( ), ] + # group all panels panels = ( - hc_panels + overview_panels + system_panels + tasks_panels + task_errors_panels @@ -1124,6 +1064,7 @@ def dashboard( + worker_panels ) + # build dashboard return G.Dashboard( title="Kafka Connect cluster - v2", description="Overview of the Kafka Connect cluster", @@ -1143,9 +1084,13 @@ def dashboard( ).auto_panel_ids() +# main labels to customize dashboard +ds = os.environ.get("DATASOURCE", "Prometheus") env_label = os.environ.get("ENV_LABEL", "env") server_label = os.environ.get("SERVER_LABEL", "hostname") connect_cluster_label = os.environ.get( "CONNECT_CLUSTER_LABEL", "kafka_connect_cluster_id" ) -dashboard = dashboard(env_label, server_label, connect_cluster_label) + +# dashboard required by grafanalib +dashboard = dashboard(ds, env_label, server_label, connect_cluster_label) diff --git a/grafana-dashboards/ksqldb-cluster.py b/grafana-dashboards/ksqldb-cluster.py index ea3c1443..3caeab89 100644 --- a/grafana-dashboards/ksqldb-cluster.py +++ b/grafana-dashboards/ksqldb-cluster.py @@ -2,47 +2,80 @@ import grafanalib.core as G -def dashboard(env_label="namespace", server_label="pod", ksqldb_cluster_label="app"): +def dashboard( + ds="Prometheus", + env_label="namespace", + server_label="pod", + ksqldb_cluster_label="app", +): + """ + ksqlDB cluster dashboard + It includes: + - Cluster overview + - System resources + - Query Performance + - State Stores + + Structure: + - Default sizes + - Queries + - Templating (variables) + - Panel groups + - Dashboard definition + + Dashboard is defined by a name, it includes the variables to template panels, and then adds the panels. + Panels are grouped in Row to load only needed panels and load others on demand. + + Invariants: + - Max width: 24 + """ + + # Default sizes default_height = 5 stat_width = 4 ts_width = 8 + # Queries + by_env = env_label + '="$env"' + by_cluster = by_env + "," + ksqldb_cluster_label + '="$ksqldb_cluster"' + by_server = by_cluster + "," + server_label + '=~"$ksqldb_server"' + by_thread = by_server + 'thread_id=~".+$ksqldb_cluster_id.+"' + + # Templating (variables) templating = G.Templating( list=[ G.Template( name="env", label="Environment", - dataSource="Prometheus", + dataSource=ds, query="label_values(" + env_label + ")", ), G.Template( name="ksqldb_cluster", label="ksqlDB cluster", - dataSource="Prometheus", + dataSource=ds, query="label_values(ksql_ksql_engine_query_stats_num_active_queries{" - + env_label - + '="$env"},' + + by_env + + "}," + ksqldb_cluster_label + ")", ), G.Template( name="ksqldb_cluster_id", label="ksqlDB cluster ID", - dataSource="Prometheus", + dataSource=ds, query="label_values(ksql_ksql_engine_query_stats_num_active_queries{" - + env_label - + '="$env"},ksql_cluster)', + + by_env + + "},ksql_cluster)", hide=2, # true ), G.Template( name="ksqldb_server", label="ksqlDB server", - dataSource="Prometheus", + dataSource=ds, query="label_values(ksql_ksql_engine_query_stats_num_active_queries{" - + env_label - + '="$env",' - + ksqldb_cluster_label - + '="$ksqldb_cluster"}, ' + + by_cluster + + "}, " + server_label + ")", multi=True, @@ -51,24 +84,27 @@ def dashboard(env_label="namespace", server_label="pod", ksqldb_cluster_label="a ] ) - hc_base = 0 - hc_panels = [ + # Panel groups + ## Cluster overview: + ### When updating descriptions on these panels, also update descriptions in confluent-platform.py + overview_base = 0 + overview_panels = [ G.RowPanel( title="Overview", - gridPos=G.GridPos(h=1, w=24, x=0, y=hc_base), + gridPos=G.GridPos(h=1, w=24, x=0, y=overview_base), ), + + # First layer G.Stat( title="ksqlDB: Online Servers", description="""ksqlDB online instances returning metrics. """, - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="count(ksql_ksql_engine_query_stats_num_active_queries{" - + env_label - + '="$env", ' - + ksqldb_cluster_label - + '="$ksqldb_cluster"})', + + by_cluster + + "})", ), ], reduceCalc="last", @@ -76,21 +112,19 @@ def dashboard(env_label="namespace", server_label="pod", ksqldb_cluster_label="a G.Threshold(index=0, value=0.0, color="blue"), ], gridPos=G.GridPos( - h=default_height, w=stat_width, x=stat_width * 0, y=hc_base + h=default_height, w=stat_width, x=stat_width * 0, y=overview_base ), ), G.Stat( title="ksqlDB: Sum of Active Queries", description="""Number of active queries deployed in the cluster. """, - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="sum(ksql_ksql_engine_query_stats_num_active_queries{" - + env_label - + '="$env", ' - + ksqldb_cluster_label - + '="$ksqldb_cluster"})', + + by_cluster + + "})", ), ], reduceCalc="last", @@ -98,7 +132,7 @@ def dashboard(env_label="namespace", server_label="pod", ksqldb_cluster_label="a G.Threshold(index=0, value=0.0, color="blue"), ], gridPos=G.GridPos( - h=default_height, w=stat_width, x=stat_width * 1, y=hc_base + h=default_height, w=stat_width, x=stat_width * 1, y=overview_base ), ), G.Stat( @@ -106,14 +140,12 @@ def dashboard(env_label="namespace", server_label="pod", ksqldb_cluster_label="a description="""Number of running queries deployed in the cluster. Ideally, this number should be equal to the number of active queries as queries should be running. """, - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="sum(ksql_ksql_engine_query_stats_running_queries{" - + env_label - + '="$env", ' - + ksqldb_cluster_label - + '="$ksqldb_cluster"})', + + by_cluster + + "})", ), ], reduceCalc="last", @@ -122,7 +154,7 @@ def dashboard(env_label="namespace", server_label="pod", ksqldb_cluster_label="a G.Threshold(index=1, value=1.0, color="green"), ], gridPos=G.GridPos( - h=default_height, w=stat_width, x=stat_width * 2, y=hc_base + h=default_height, w=stat_width, x=stat_width * 2, y=overview_base ), ), G.Stat( @@ -131,14 +163,12 @@ def dashboard(env_label="namespace", server_label="pod", ksqldb_cluster_label="a Ideally, this number should be equal zero, or return to zero in a short period (e.g. 1 minute). It's recommended to alert if the number of rebalancing queries stay higher than 0 for a longer period of time. """, - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="sum(ksql_ksql_engine_query_stats_rebalancing_queries{" - + env_label - + '="$env", ' - + ksqldb_cluster_label - + '="$ksqldb_cluster"})', + + by_cluster + + "})", ), ], reduceCalc="last", @@ -147,7 +177,7 @@ def dashboard(env_label="namespace", server_label="pod", ksqldb_cluster_label="a G.Threshold(index=1, value=1.0, color="yellow"), ], gridPos=G.GridPos( - h=default_height, w=stat_width, x=stat_width * 3, y=hc_base + h=default_height, w=stat_width, x=stat_width * 3, y=overview_base ), ), G.Stat( @@ -156,14 +186,12 @@ def dashboard(env_label="namespace", server_label="pod", ksqldb_cluster_label="a Ideally, this number should be equal zero. It's recommended to alert if the number of queries failed is higher than 0. """, - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="avg(ksql_ksql_engine_query_stats_error_queries{" - + env_label - + '="$env", ' - + ksqldb_cluster_label - + '="$ksqldb_cluster"})', + + by_cluster + + "})", ), ], reduceCalc="last", @@ -172,36 +200,36 @@ def dashboard(env_label="namespace", server_label="pod", ksqldb_cluster_label="a G.Threshold(index=1, value=1.0, color="red"), ], gridPos=G.GridPos( - h=default_height, w=stat_width, x=stat_width * 4, y=hc_base + h=default_height, w=stat_width, x=stat_width * 4, y=overview_base ), ), + + # Second layer G.TimeSeries( title="Cluster Liveness", - dataSource="Prometheus", + description="A metric with constant value 1 indicating the server is up and emitting metrics.", + dataSource=ds, targets=[ G.Target( expr="ksql_ksql_engine_query_stats_liveness_indicator{" - + env_label - + '="$env",' - + ksqldb_cluster_label - + '="$ksqldb_cluster"}', + + by_cluster + + "}", legendFormat="{{" + server_label + "}}", ), ], legendDisplayMode="table", legendCalcs=["max", "mean", "last"], - gridPos=G.GridPos(h=default_height * 2, w=ts_width, x=0, y=hc_base + 1), + gridPos=G.GridPos(h=default_height * 2, w=ts_width, x=0, y=overview_base + 1), ), G.TimeSeries( title="Messages consumed/sec", - dataSource="Prometheus", + description="The number of messages consumed per second across all queries.", + dataSource=ds, targets=[ G.Target( expr="ksql_ksql_engine_query_stats_messages_consumed_per_sec{" - + env_label - + '="$env",' - + ksqldb_cluster_label - + '="$ksqldb_cluster"}', + + by_cluster + + "}", legendFormat="{{" + server_label + "}}", ), ], @@ -209,19 +237,18 @@ def dashboard(env_label="namespace", server_label="pod", ksqldb_cluster_label="a legendCalcs=["max", "mean", "last"], unit="cps", gridPos=G.GridPos( - h=default_height * 2, w=ts_width, x=ts_width * 1, y=hc_base + 1 + h=default_height * 2, w=ts_width, x=ts_width * 1, y=overview_base + 1 ), ), G.TimeSeries( title="Messages produced/sec", - dataSource="Prometheus", + description="The number of messages produced per second across all queries.", + dataSource=ds, targets=[ G.Target( expr="ksql_ksql_engine_query_stats_messages_produced_per_sec{" - + env_label - + '="$env",' - + ksqldb_cluster_label - + '="$ksqldb_cluster"}', + + by_cluster + + "}", legendFormat="{{" + server_label + "}}", ), ], @@ -229,14 +256,14 @@ def dashboard(env_label="namespace", server_label="pod", ksqldb_cluster_label="a legendCalcs=["max", "mean", "last"], unit="cps", gridPos=G.GridPos( - h=default_height * 2, w=ts_width, x=ts_width * 2, y=hc_base + 1 + h=default_height * 2, w=ts_width, x=ts_width * 2, y=overview_base + 1 ), ), ] ## System resources: ### When updating descriptions on these panels, also update descriptions in other cluster dashboards - system_base = hc_base + 2 + system_base = overview_base + 2 system_panels = [ G.RowPanel( title="System", @@ -247,16 +274,10 @@ def dashboard(env_label="namespace", server_label="pod", ksqldb_cluster_label="a description="""Rate of CPU seconds used by the Java process. 100% usage represents one core. If there are multiple cores, the total capacity should be 100% * number_cores.""", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( - expr="irate(process_cpu_seconds_total{" - + env_label - + '="$env",' - + ksqldb_cluster_label - + '="$ksqldb_cluster",' - + server_label - + '=~"$ksqldb_server"}[5m])', + expr="irate(process_cpu_seconds_total{" + by_server + "}[5m])", legendFormat="{{" + server_label + "}}", ), ], @@ -270,16 +291,10 @@ def dashboard(env_label="namespace", server_label="pod", ksqldb_cluster_label="a G.TimeSeries( title="Memory usage", description="""Sum of JVM memory used, without including areas (e.g. heap size).""", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( - expr="sum without(area)(jvm_memory_bytes_used{" - + env_label - + '="$env",' - + ksqldb_cluster_label - + '="$ksqldb_cluster",' - + server_label - + '=~"$ksqldb_server"})', + expr="sum without(area)(jvm_memory_bytes_used{" + by_server + "})", legendFormat="{{" + server_label + "}}", ), ], @@ -293,16 +308,12 @@ def dashboard(env_label="namespace", server_label="pod", ksqldb_cluster_label="a G.TimeSeries( title="GC collection", description="""Sum of seconds used by Garbage Collection.""", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="sum without(gc)(irate(jvm_gc_collection_seconds_sum{" - + env_label - + '="$env",' - + ksqldb_cluster_label - + '="$ksqldb_cluster",' - + server_label - + '=~"$ksqldb_server"}[5m]))', + + by_server + + "}[5m]))", legendFormat="{{" + server_label + "}}", ), ], @@ -315,20 +326,17 @@ def dashboard(env_label="namespace", server_label="pod", ksqldb_cluster_label="a ), ] + ## Query performance queries_base = system_base + 1 queries_inner = [ G.TimeSeries( title="Poll Latency (Avg.)", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="kafka_streams_stream_thread_metrics_poll_latency_avg{" - + env_label - + '="$env",' - + ksqldb_cluster_label - + '="$ksqldb_cluster",' - + server_label - + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + + by_thread + + "}", legendFormat="{{thread_id}}", ), ], @@ -341,16 +349,12 @@ def dashboard(env_label="namespace", server_label="pod", ksqldb_cluster_label="a ), G.TimeSeries( title="Poll Latency (Max.)", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="kafka_streams_stream_thread_metrics_poll_latency_max{" - + env_label - + '="$env",' - + ksqldb_cluster_label - + '="$ksqldb_cluster",' - + server_label - + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + + by_thread + + "}", legendFormat="{{thread_id}}", ), ], @@ -363,16 +367,12 @@ def dashboard(env_label="namespace", server_label="pod", ksqldb_cluster_label="a ), G.TimeSeries( title="Process Latency (Avg.)", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="kafka_streams_stream_thread_metrics_process_latency_avg{" - + env_label - + '="$env",' - + ksqldb_cluster_label - + '="$ksqldb_cluster",' - + server_label - + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + + by_thread + + "}", legendFormat="{{thread_id}}", ), ], @@ -385,16 +385,12 @@ def dashboard(env_label="namespace", server_label="pod", ksqldb_cluster_label="a ), G.TimeSeries( title="Process Latency (Max.)", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="kafka_streams_stream_thread_metrics_process_latency_max{" - + env_label - + '="$env",' - + ksqldb_cluster_label - + '="$ksqldb_cluster",' - + server_label - + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + + by_thread + + "}", legendFormat="{{thread_id}}", ), ], @@ -407,16 +403,12 @@ def dashboard(env_label="namespace", server_label="pod", ksqldb_cluster_label="a ), G.TimeSeries( title="Commit Latency (Avg.)", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="kafka_streams_stream_thread_metrics_commit_latency_avg{" - + env_label - + '="$env",' - + ksqldb_cluster_label - + '="$ksqldb_cluster",' - + server_label - + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + + by_thread + + "}", legendFormat="{{thread_id}}", ), ], @@ -429,16 +421,12 @@ def dashboard(env_label="namespace", server_label="pod", ksqldb_cluster_label="a ), G.TimeSeries( title="Commit Latency (Max.)", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="kafka_streams_stream_thread_metrics_commit_latency_max{" - + env_label - + '="$env",' - + ksqldb_cluster_label - + '="$ksqldb_cluster",' - + server_label - + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + + by_thread + + "}", legendFormat="{{thread_id}}", ), ], @@ -451,16 +439,12 @@ def dashboard(env_label="namespace", server_label="pod", ksqldb_cluster_label="a ), G.TimeSeries( title="Punctuate Latency (Avg.)", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="kafka_streams_stream_thread_metrics_punctuate_latency_avg{" - + env_label - + '="$env",' - + ksqldb_cluster_label - + '="$ksqldb_cluster",' - + server_label - + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + + by_thread + + "}", legendFormat="{{thread_id}}", ), ], @@ -473,16 +457,12 @@ def dashboard(env_label="namespace", server_label="pod", ksqldb_cluster_label="a ), G.TimeSeries( title="Punctuate Latency (Max.)", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="kafka_streams_stream_thread_metrics_punctuate_latency_max{" - + env_label - + '="$env",' - + ksqldb_cluster_label - + '="$ksqldb_cluster",' - + server_label - + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + + by_thread + + "}", legendFormat="{{thread_id}}", ), ], @@ -503,20 +483,17 @@ def dashboard(env_label="namespace", server_label="pod", ksqldb_cluster_label="a ), ] + ## State stores: stores_base = queries_base + 4 stores_inner = [ G.TimeSeries( title="Put Rate", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="kafka_streams_stream_state_metrics_put_rate{" - + env_label - + '="$env",' - + ksqldb_cluster_label - + '="$ksqldb_cluster",' - + server_label - + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + + by_thread + + "}", legendFormat="{{thread_id}}", ), ], @@ -529,16 +506,12 @@ def dashboard(env_label="namespace", server_label="pod", ksqldb_cluster_label="a ), G.TimeSeries( title="Put Latency (Avg.)", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="kafka_streams_stream_state_metrics_put_latency_avg{" - + env_label - + '="$env",' - + ksqldb_cluster_label - + '="$ksqldb_cluster",' - + server_label - + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + + by_thread + + "}", legendFormat="{{thread_id}}", ), ], @@ -551,16 +524,12 @@ def dashboard(env_label="namespace", server_label="pod", ksqldb_cluster_label="a ), G.TimeSeries( title="Put Latency (Max.)", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="kafka_streams_stream_state_metrics_put_latency_max{" - + env_label - + '="$env",' - + ksqldb_cluster_label - + '="$ksqldb_cluster",' - + server_label - + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + + by_thread + + "}", legendFormat="{{thread_id}}", ), ], @@ -573,16 +542,12 @@ def dashboard(env_label="namespace", server_label="pod", ksqldb_cluster_label="a ), G.TimeSeries( title="Put if absent Rate", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="kafka_streams_stream_state_metrics_put_if_absent_rate{" - + env_label - + '="$env",' - + ksqldb_cluster_label - + '="$ksqldb_cluster",' - + server_label - + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + + by_thread + + "}", legendFormat="{{thread_id}}", ), ], @@ -595,16 +560,12 @@ def dashboard(env_label="namespace", server_label="pod", ksqldb_cluster_label="a ), G.TimeSeries( title="Put if absent Latency (Avg.)", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="kafka_streams_stream_state_metrics_put_if_absent_latency_avg{" - + env_label - + '="$env",' - + ksqldb_cluster_label - + '="$ksqldb_cluster",' - + server_label - + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + + by_thread + + "}", legendFormat="{{thread_id}}", ), ], @@ -617,16 +578,12 @@ def dashboard(env_label="namespace", server_label="pod", ksqldb_cluster_label="a ), G.TimeSeries( title="Put if absent Latency (Max.)", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="kafka_streams_stream_state_metrics_put_if_absent_latency_max{" - + env_label - + '="$env",' - + ksqldb_cluster_label - + '="$ksqldb_cluster",' - + server_label - + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + + by_thread + + "}", legendFormat="{{thread_id}}", ), ], @@ -639,16 +596,12 @@ def dashboard(env_label="namespace", server_label="pod", ksqldb_cluster_label="a ), G.TimeSeries( title="Fetch Rate", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="kafka_streams_stream_state_metrics_fetch_rate{" - + env_label - + '="$env",' - + ksqldb_cluster_label - + '="$ksqldb_cluster",' - + server_label - + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + + by_thread + + "}", legendFormat="{{thread_id}}", ), ], @@ -661,16 +614,12 @@ def dashboard(env_label="namespace", server_label="pod", ksqldb_cluster_label="a ), G.TimeSeries( title="Fetch Latency (Avg.)", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="kafka_streams_stream_state_metrics_fetch_latency_avg{" - + env_label - + '="$env",' - + ksqldb_cluster_label - + '="$ksqldb_cluster",' - + server_label - + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + + by_thread + + "}", legendFormat="{{thread_id}}", ), ], @@ -683,16 +632,12 @@ def dashboard(env_label="namespace", server_label="pod", ksqldb_cluster_label="a ), G.TimeSeries( title="Fetch Latency (Max.)", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="kafka_streams_stream_state_metrics_fetch_latency_max{" - + env_label - + '="$env",' - + ksqldb_cluster_label - + '="$ksqldb_cluster",' - + server_label - + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + + by_thread + + "}", legendFormat="{{thread_id}}", ), ], @@ -705,16 +650,12 @@ def dashboard(env_label="namespace", server_label="pod", ksqldb_cluster_label="a ), G.TimeSeries( title="Delete Rate", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="kafka_streams_stream_state_metrics_delete_rate{" - + env_label - + '="$env",' - + ksqldb_cluster_label - + '="$ksqldb_cluster",' - + server_label - + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + + by_thread + + "}", legendFormat="{{thread_id}}", ), ], @@ -727,16 +668,12 @@ def dashboard(env_label="namespace", server_label="pod", ksqldb_cluster_label="a ), G.TimeSeries( title="Delete Latency (Avg.)", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="kafka_streams_stream_state_metrics_delete_latency_avg{" - + env_label - + '="$env",' - + ksqldb_cluster_label - + '="$ksqldb_cluster",' - + server_label - + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + + by_thread + + "}", legendFormat="{{thread_id}}", ), ], @@ -749,16 +686,12 @@ def dashboard(env_label="namespace", server_label="pod", ksqldb_cluster_label="a ), G.TimeSeries( title="Delete Latency (Max.)", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="kafka_streams_stream_state_metrics_delete_latency_max{" - + env_label - + '="$env",' - + ksqldb_cluster_label - + '="$ksqldb_cluster",' - + server_label - + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + + by_thread + + "}", legendFormat="{{thread_id}}", ), ], @@ -771,16 +704,12 @@ def dashboard(env_label="namespace", server_label="pod", ksqldb_cluster_label="a ), G.TimeSeries( title="Restore Rate", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="kafka_streams_stream_state_metrics_restore_rate{" - + env_label - + '="$env",' - + ksqldb_cluster_label - + '="$ksqldb_cluster",' - + server_label - + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + + by_thread + + "}", legendFormat="{{thread_id}}", ), ], @@ -793,16 +722,12 @@ def dashboard(env_label="namespace", server_label="pod", ksqldb_cluster_label="a ), G.TimeSeries( title="Restore Latency (Avg.)", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="kafka_streams_stream_state_metrics_restore_latency_avg{" - + env_label - + '="$env",' - + ksqldb_cluster_label - + '="$ksqldb_cluster",' - + server_label - + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + + by_thread + + "}", legendFormat="{{thread_id}}", ), ], @@ -815,16 +740,12 @@ def dashboard(env_label="namespace", server_label="pod", ksqldb_cluster_label="a ), G.TimeSeries( title="Restore Latency (Max.)", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="kafka_streams_stream_state_metrics_restore_latency_max{" - + env_label - + '="$env",' - + ksqldb_cluster_label - + '="$ksqldb_cluster",' - + server_label - + '=~"$ksqldb_server",thread_id=~".+$ksqldb_cluster_id.+"}', + + by_thread + + "}", legendFormat="{{thread_id}}", ), ], @@ -845,8 +766,10 @@ def dashboard(env_label="namespace", server_label="pod", ksqldb_cluster_label="a ), ] - panels = hc_panels + system_panels + queries_panels + stores_panels + # group all panels + panels = overview_panels + system_panels + queries_panels + stores_panels + # build dashboard return G.Dashboard( title="ksqlDB cluster - v2", description="Overview of ksqlDB clusters.", @@ -869,7 +792,11 @@ def dashboard(env_label="namespace", server_label="pod", ksqldb_cluster_label="a ).auto_panel_ids() +# main labels to customize dashboard +ds = os.environ.get("DATASOURCE", "Prometheus") env_label = os.environ.get("ENV_LABEL", "env") server_label = os.environ.get("SERVER_LABEL", "hostname") ksqldb_cluster_label = os.environ.get("KSQLDB_CLUSTER_LABEL", "ksqldb_cluster_id") -dashboard = dashboard(env_label, server_label, ksqldb_cluster_label) + +# dashboard required by grafanalib +dashboard = dashboard(ds, env_label, server_label, ksqldb_cluster_label) diff --git a/grafana-dashboards/schema-registry-cluster.py b/grafana-dashboards/schema-registry-cluster.py index 233df469..023f0f37 100644 --- a/grafana-dashboards/schema-registry-cluster.py +++ b/grafana-dashboards/schema-registry-cluster.py @@ -2,26 +2,52 @@ import grafanalib.core as G -def dashboard(env_label="namespace", server_label="pod"): +def dashboard(ds="Prometheus", env_label="namespace", server_label="pod"): + """ + Schema Registry cluster dashboard + It includes: + - Cluster overview + - System resources + + Structure: + - Default sizes + - Queries + - Templating (variables) + - Panel groups + - Dashboard definition + + Dashboard is defined by a name, it includes the variables to template panels, and then adds the panels. + Panels are grouped in Row to load only needed panels and load others on demand. + + Invariants: + - Max width: 24 + """ + + # Default sizes default_height = 5 stat_width = 4 ts_width = 8 + # Queries + by_env = env_label + '="$env"' + by_server = by_env + "," + server_label + '="$sr_server"' + + # Templating (variables) templating = G.Templating( list=[ G.Template( name="env", label="Environment", - dataSource="Prometheus", + dataSource=ds, query="label_values(" + env_label + ")", ), G.Template( name="sr_server", label="Server", - dataSource="Prometheus", + dataSource=ds, query="label_values(kafka_schema_registry_registered_count{" - + env_label - + '="$env"}, ' + + by_env + + "}, " + server_label + ")", multi=True, @@ -30,6 +56,9 @@ def dashboard(env_label="namespace", server_label="pod"): ] ) + # Panel groups + ## Cluster overview: + ### When updating descriptions on these panels, also update descriptions in confluent-platform.py healthcheck_base = 0 healthcheck_panels = [ G.RowPanel( @@ -40,12 +69,12 @@ def dashboard(env_label="namespace", server_label="pod"): title="SR: Online instances", description="""Schema Registry online instances returning metrics. """, - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="count(kafka_schema_registry_registered_count{" - + env_label - + '="$env"})', + + by_env + + "})", ), ], reduceCalc="last", @@ -62,12 +91,10 @@ def dashboard(env_label="namespace", server_label="pod"): title="SR: Registered Schemas (avg.)", description="""Average number of registered schemas across the cluster. """, - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( - expr="avg(kafka_schema_registry_registered_count{" - + env_label - + '="$env"})', + expr="avg(kafka_schema_registry_registered_count{" + by_env + "})", ), ], reduceCalc="last", @@ -82,12 +109,12 @@ def dashboard(env_label="namespace", server_label="pod"): title="SR: Created Schemas by Type (avg.)", description="""Average number of schemas created, by type. """, - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="avg(kafka_schema_registry_schemas_created{" - + env_label - + '="$env"}) by (schema_type)', + + by_env + + "}) by (schema_type)", legendFormat="{{schema_type}}", ), ], @@ -101,12 +128,12 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.Stat( title="SR: Sum of Deleted Schemas by Type", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="sum(kafka_schema_registry_schemas_deleted{" - + env_label - + '="$env"}) by (schema_type)', + + by_env + + "}) by (schema_type)", legendFormat="{{schema_type}}", ), ], @@ -120,10 +147,13 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.Stat( title="SR: Sum of Active Connections", - dataSource="Prometheus", + description="Number of active connections", + dataSource=ds, targets=[ G.Target( - expr="sum(kafka_schema_registry_kafka_schema_registry_metrics_connection_count)", + expr="sum(kafka_schema_registry_kafka_schema_registry_metrics_connection_count{" + + by_env + + "})", ), ], reduceCalc="last", @@ -136,6 +166,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), ] + ## System resources: system_panels = [ G.RowPanel( title="System", @@ -143,14 +174,10 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="CPU usage", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( - expr="irate(process_cpu_seconds_total{" - + env_label - + '="$env",' - + server_label - + '=~"$sr_server"}[5m])', + expr="irate(process_cpu_seconds_total{" + by_server + "}[5m])", legendFormat="{{" + server_label + "}}", ), ], @@ -161,14 +188,10 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Memory usage", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( - expr="sum without(area)(jvm_memory_bytes_used{" - + env_label - + '="$env",' - + server_label - + '=~"$sr_server"})', + expr="sum without(area)(jvm_memory_bytes_used{" + by_server + "})", legendFormat="{{" + server_label + "}}", ), ], @@ -179,14 +202,12 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="GC collection", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="sum without(gc)(irate(jvm_gc_collection_seconds_sum{" - + env_label - + '="$env",' - + server_label - + '=~"$sr_server"}[5m]))', + + by_server + + "}[5m]))", legendFormat="{{" + server_label + "}}", ), ], @@ -197,8 +218,10 @@ def dashboard(env_label="namespace", server_label="pod"): ), ] + # group all panels panels = healthcheck_panels + system_panels + # build dashboard return G.Dashboard( title="Schema Registry cluster - v2", description="Overview of the Schema Registry cluster", @@ -218,6 +241,10 @@ def dashboard(env_label="namespace", server_label="pod"): ).auto_panel_ids() +# main labels to customize dashboard +ds = os.environ.get("DATASOURCE", "Prometheus") env_label = os.environ.get("ENV_LABEL", "env") server_label = os.environ.get("SERVER_LABEL", "hostname") -dashboard = dashboard(env_label, server_label) + +# dashboard required by grafanalib +dashboard = dashboard(ds, env_label, server_label) diff --git a/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-connect-cluster.json b/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-connect-cluster.json index d2e12193..d403d74c 100644 --- a/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-connect-cluster.json +++ b/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-connect-cluster.json @@ -55,7 +55,7 @@ "targets": [], "timeFrom": null, "timeShift": null, - "title": "Overview", + "title": "Cluster Overview", "transformations": [], "transparent": false, "type": "row" @@ -589,7 +589,7 @@ }, "columns": [], "datasource": "Prometheus", - "description": null, + "description": "Connect workers metadata and stats.\n ", "editable": true, "error": false, "fieldConfig": { @@ -809,7 +809,7 @@ }, "columns": [], "datasource": "Prometheus", - "description": null, + "description": "Connectors deployed and task stats.\n ", "editable": true, "error": false, "fieldConfig": { @@ -963,7 +963,7 @@ { "cacheTimeout": null, "datasource": "Prometheus", - "description": null, + "description": "How much time the connector tasks are in running state.\n ", "editable": true, "error": false, "fieldConfig": { @@ -1063,7 +1063,7 @@ { "cacheTimeout": null, "datasource": "Prometheus", - "description": null, + "description": "Average ime spent on rebalance state.\n ", "editable": true, "error": false, "fieldConfig": { @@ -1155,7 +1155,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "Rebalance Latency", + "title": "Rebalance Latency (avg.)", "transformations": [], "transparent": false, "type": "timeseries" @@ -1534,7 +1534,7 @@ { "cacheTimeout": null, "datasource": "Prometheus", - "description": null, + "description": "Maximum and average size of the batches processed by the connector task.", "editable": true, "error": false, "fieldConfig": { @@ -1617,98 +1617,12 @@ "instant": false, "interval": "", "intervalFactor": 2, - "legendFormat": "{{connector}}[{{task}}]", + "legendFormat": "{{connector}}[{{task}}] (avg.)", "metric": "", "refId": "", "step": 10, "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Batch Size (Avg.)", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 6 - }, - "height": null, - "hideTimeOverride": false, - "id": 18, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ { "datasource": null, "expr": "kafka_connect_connector_task_metrics_batch_size_max{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", @@ -1717,7 +1631,7 @@ "instant": false, "interval": "", "intervalFactor": 2, - "legendFormat": "{{connector}}[{{task}}]", + "legendFormat": "{{connector}}[{{task}}] (max.)", "metric": "", "refId": "", "step": 10, @@ -1726,7 +1640,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "Batch Size (Max.)", + "title": "Batch Size", "transformations": [], "transparent": false, "type": "timeseries" @@ -1734,7 +1648,7 @@ { "cacheTimeout": null, "datasource": "Prometheus", - "description": null, + "description": "Percentage of offset commit successful and failed.", "editable": true, "error": false, "fieldConfig": { @@ -1781,11 +1695,11 @@ "h": 10, "w": 8, "x": 0, - "y": 7 + "y": 6 }, "height": null, "hideTimeOverride": false, - "id": 19, + "id": 18, "interval": null, "links": [], "maxDataPoints": 100, @@ -1817,7 +1731,21 @@ "instant": false, "interval": "", "intervalFactor": 2, - "legendFormat": "{{connector}}[{{task}}]", + "legendFormat": "{{connector}}[{{task}}] (success)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "kafka_connect_connector_task_metrics_offset_commit_failure_percentage{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}] (failure)", "metric": "", "refId": "", "step": 10, @@ -1826,7 +1754,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "Offset commit success %", + "title": "Offset commit", "transformations": [], "transparent": false, "type": "timeseries" @@ -1834,7 +1762,7 @@ { "cacheTimeout": null, "datasource": "Prometheus", - "description": null, + "description": "Average and Maximum time in milliseconds taken by the task to commit offsets", "editable": true, "error": false, "fieldConfig": { @@ -1880,12 +1808,12 @@ "gridPos": { "h": 10, "w": 8, - "x": 8, - "y": 7 + "x": 16, + "y": 6 }, "height": null, "hideTimeOverride": false, - "id": 20, + "id": 19, "interval": null, "links": [], "maxDataPoints": 100, @@ -1917,7 +1845,21 @@ "instant": false, "interval": "", "intervalFactor": 2, - "legendFormat": "{{connector}}[{{task}}]", + "legendFormat": "{{connector}}[{{task}}] (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "kafka_connect_connector_task_metrics_offset_commit_max_time_ms{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}] (max.)", "metric": "", "refId": "", "step": 10, @@ -1926,7 +1868,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "Offset commit avg. latency", + "title": "Offset commit latency", "transformations": [], "transparent": false, "type": "timeseries" @@ -1962,11 +1904,11 @@ "h": 1, "w": 24, "x": 0, - "y": 8 + "y": 7 }, "height": null, "hideTimeOverride": false, - "id": 21, + "id": 20, "interval": null, "links": [], "maxDataPoints": 100, @@ -1976,7 +1918,7 @@ { "cacheTimeout": null, "datasource": "Prometheus", - "description": null, + "description": "Total number of failures seen by task.", "editable": true, "error": false, "fieldConfig": { @@ -2023,11 +1965,11 @@ "h": 10, "w": 8, "x": 0, - "y": 8 + "y": 7 }, "height": null, "hideTimeOverride": false, - "id": 22, + "id": 21, "interval": null, "links": [], "maxDataPoints": 100, @@ -2076,7 +2018,7 @@ { "cacheTimeout": null, "datasource": "Prometheus", - "description": null, + "description": "Total number of errors seen by task.", "editable": true, "error": false, "fieldConfig": { @@ -2123,11 +2065,11 @@ "h": 10, "w": 8, "x": 8, - "y": 8 + "y": 7 }, "height": null, "hideTimeOverride": false, - "id": 23, + "id": 22, "interval": null, "links": [], "maxDataPoints": 100, @@ -2176,7 +2118,7 @@ { "cacheTimeout": null, "datasource": "Prometheus", - "description": null, + "description": "Total number of records skipped seen by task.", "editable": true, "error": false, "fieldConfig": { @@ -2223,11 +2165,11 @@ "h": 10, "w": 8, "x": 16, - "y": 8 + "y": 7 }, "height": null, "hideTimeOverride": false, - "id": 24, + "id": 23, "interval": null, "links": [], "maxDataPoints": 100, @@ -2276,7 +2218,7 @@ { "cacheTimeout": null, "datasource": "Prometheus", - "description": null, + "description": "Total number of records logged seen by task.", "editable": true, "error": false, "fieldConfig": { @@ -2323,11 +2265,11 @@ "h": 10, "w": 8, "x": 0, - "y": 9 + "y": 8 }, "height": null, "hideTimeOverride": false, - "id": 25, + "id": 24, "interval": null, "links": [], "maxDataPoints": 100, @@ -2376,7 +2318,7 @@ { "cacheTimeout": null, "datasource": "Prometheus", - "description": null, + "description": "Total number of retries seen by task.", "editable": true, "error": false, "fieldConfig": { @@ -2423,11 +2365,11 @@ "h": 10, "w": 8, "x": 8, - "y": 9 + "y": 8 }, "height": null, "hideTimeOverride": false, - "id": 26, + "id": 25, "interval": null, "links": [], "maxDataPoints": 100, @@ -2476,7 +2418,7 @@ { "cacheTimeout": null, "datasource": "Prometheus", - "description": null, + "description": "Number of produce requests to dead letter topics.", "editable": true, "error": false, "fieldConfig": { @@ -2523,11 +2465,11 @@ "h": 10, "w": 8, "x": 16, - "y": 9 + "y": 8 }, "height": null, "hideTimeOverride": false, - "id": 27, + "id": 26, "interval": null, "links": [], "maxDataPoints": 100, @@ -2604,11 +2546,11 @@ "h": 1, "w": 24, "x": 0, - "y": 10 + "y": 9 }, "height": null, "hideTimeOverride": false, - "id": 28, + "id": 27, "interval": null, "links": [], "maxDataPoints": 100, @@ -2618,7 +2560,7 @@ { "cacheTimeout": null, "datasource": "Prometheus", - "description": null, + "description": "Average and Maximum time in milliseconds taken by this task to poll for a batch of source records", "editable": true, "error": false, "fieldConfig": { @@ -2665,11 +2607,11 @@ "h": 10, "w": 8, "x": 0, - "y": 10 + "y": 9 }, "height": null, "hideTimeOverride": false, - "id": 29, + "id": 28, "interval": null, "links": [], "maxDataPoints": 100, @@ -2701,7 +2643,21 @@ "instant": false, "interval": "", "intervalFactor": 2, - "legendFormat": "{{connector}}[{{task}}]", + "legendFormat": "{{connector}}[{{task}}] (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "kafka_connect_source_task_metrics_poll_batch_max_time_ms{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}] (max.)", "metric": "", "refId": "", "step": 10, @@ -2710,7 +2666,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "Poll Batch Avg. Latency", + "title": "Poll Batch Latency", "transformations": [], "transparent": false, "type": "timeseries" @@ -2718,7 +2674,7 @@ { "cacheTimeout": null, "datasource": "Prometheus", - "description": null, + "description": "Before transformations are applied, \n this is the average per-second number of records produced or \n polled by the task belonging to the named source connector in the worker\n ", "editable": true, "error": false, "fieldConfig": { @@ -2757,7 +2713,7 @@ "mode": "absolute", "steps": [] }, - "unit": "ms" + "unit": "ops" }, "overrides": [] }, @@ -2765,11 +2721,11 @@ "h": 10, "w": 8, "x": 8, - "y": 10 + "y": 9 }, "height": null, "hideTimeOverride": false, - "id": 30, + "id": 29, "interval": null, "links": [], "maxDataPoints": 100, @@ -2795,7 +2751,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_connect_source_task_metrics_poll_batch_max_time_ms{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", + "expr": "kafka_connect_source_task_metrics_source_record_poll_rate{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", "hide": false, "instant": false, @@ -2810,7 +2766,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "Poll Batch Max. Latency", + "title": "Source Record Poll Rate", "transformations": [], "transparent": false, "type": "timeseries" @@ -2818,7 +2774,7 @@ { "cacheTimeout": null, "datasource": "Prometheus", - "description": null, + "description": "After transformations are applied, \n this is the average per-second number of records output from the transformations and \n written to Kafka for the task belonging to the named source connector in the worker \n (excludes any records filtered out by the transformations)\n ", "editable": true, "error": false, "fieldConfig": { @@ -2864,12 +2820,12 @@ "gridPos": { "h": 10, "w": 8, - "x": 0, - "y": 11 + "x": 16, + "y": 9 }, "height": null, "hideTimeOverride": false, - "id": 31, + "id": 30, "interval": null, "links": [], "maxDataPoints": 100, @@ -2895,7 +2851,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_connect_source_task_metrics_source_record_poll_rate{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", + "expr": "kafka_connect_source_task_metrics_source_record_write_rate{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", "hide": false, "instant": false, @@ -2910,11 +2866,53 @@ ], "timeFrom": null, "timeShift": null, - "title": "Source Record Poll Rate", + "title": "Source Record Write Rate", "transformations": [], "transparent": false, "type": "timeseries" - }, + } + ], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Source Tasks", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "collapsed": true, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 10 + }, + "height": null, + "hideTimeOverride": false, + "id": 31, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [ { "cacheTimeout": null, "datasource": "Prometheus", @@ -2957,15 +2955,15 @@ "mode": "absolute", "steps": [] }, - "unit": "ops" + "unit": "ms" }, "overrides": [] }, "gridPos": { "h": 10, "w": 8, - "x": 8, - "y": 11 + "x": 0, + "y": 10 }, "height": null, "hideTimeOverride": false, @@ -2995,13 +2993,27 @@ "targets": [ { "datasource": null, - "expr": "kafka_connect_source_task_metrics_source_record_write_rate{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", + "expr": "kafka_connect_sink_task_metrics_put_batch_avg_time_ms{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", "hide": false, "instant": false, "interval": "", "intervalFactor": 2, - "legendFormat": "{{connector}}[{{task}}]", + "legendFormat": "{{connector}}[{{task}}] (avg.)", + "metric": "", + "refId": "", + "step": 10, + "target": "" + }, + { + "datasource": null, + "expr": "kafka_connect_sink_task_metrics_put_batch_max_time_ms{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{connector}}[{{task}}] (max.)", "metric": "", "refId": "", "step": 10, @@ -3010,57 +3022,15 @@ ], "timeFrom": null, "timeShift": null, - "title": "Source Record Write Rate", + "title": "Put Batch Latency", "transformations": [], "transparent": false, "type": "timeseries" - } - ], - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [], - "timeFrom": null, - "timeShift": null, - "title": "Source Tasks", - "transformations": [], - "transparent": false, - "type": "row" - }, - { - "cacheTimeout": null, - "collapsed": true, - "datasource": null, - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [] - } - } - }, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 12 - }, - "height": null, - "hideTimeOverride": false, - "id": 33, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "panels": [ + }, { "cacheTimeout": null, "datasource": "Prometheus", - "description": null, + "description": "Before transformations are applied, \n this is the average per-second number of records read from Kafka \n for the task belonging to the named sink connector in the worker\n ", "editable": true, "error": false, "fieldConfig": { @@ -3099,19 +3069,19 @@ "mode": "absolute", "steps": [] }, - "unit": "ms" + "unit": "ops" }, "overrides": [] }, "gridPos": { "h": 10, "w": 8, - "x": 0, - "y": 12 + "x": 8, + "y": 10 }, "height": null, "hideTimeOverride": false, - "id": 34, + "id": 33, "interval": null, "links": [], "maxDataPoints": 100, @@ -3137,7 +3107,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_connect_sink_task_metrics_put_batch_avg_time_ms{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", + "expr": "kafka_connect_sink_task_metrics_sink_record_read_rate{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", "hide": false, "instant": false, @@ -3152,7 +3122,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "Put Batch Avg. Latency", + "title": "Sink Record Read Rate", "transformations": [], "transparent": false, "type": "timeseries" @@ -3160,7 +3130,7 @@ { "cacheTimeout": null, "datasource": "Prometheus", - "description": null, + "description": "After transformations are applied, \n this is the average per-second number of records output from the transformations and \n sent to the task belonging to the named sink connector in the worker \n (excludes any records filtered out by the transformations)\n ", "editable": true, "error": false, "fieldConfig": { @@ -3199,19 +3169,19 @@ "mode": "absolute", "steps": [] }, - "unit": "ms" + "unit": "ops" }, "overrides": [] }, "gridPos": { "h": 10, "w": 8, - "x": 8, - "y": 12 + "x": 16, + "y": 10 }, "height": null, "hideTimeOverride": false, - "id": 35, + "id": 34, "interval": null, "links": [], "maxDataPoints": 100, @@ -3237,7 +3207,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_connect_sink_task_metrics_put_batch_max_time_ms{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", + "expr": "kafka_connect_sink_task_metrics_sink_record_send_rate{env=\"$env\",kafka_connect_cluster_id=\"$connect_cluster\",hostname=~\"$connect_worker\",connector=~\"$connector\"}", "format": "time_series", "hide": false, "instant": false, @@ -3252,7 +3222,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "Put Batch Max. Latency", + "title": "Sink Record Send Rate", "transformations": [], "transparent": false, "type": "timeseries" @@ -3260,7 +3230,7 @@ { "cacheTimeout": null, "datasource": "Prometheus", - "description": null, + "description": "Number of topic partitions assigned to the task and \n which belong to the named sink connector in the worker\n ", "editable": true, "error": false, "fieldConfig": { @@ -3307,11 +3277,11 @@ "h": 10, "w": 8, "x": 0, - "y": 13 + "y": 11 }, "height": null, "hideTimeOverride": false, - "id": 36, + "id": 35, "interval": null, "links": [], "maxDataPoints": 100, @@ -3392,7 +3362,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 37, + "id": 36, "interval": null, "links": [], "maxDataPoints": 100, @@ -3402,7 +3372,7 @@ { "cacheTimeout": null, "datasource": "Prometheus", - "description": null, + "description": "Incoming byte rate per second per worker.", "editable": true, "error": false, "fieldConfig": { @@ -3453,7 +3423,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 38, + "id": 37, "interval": null, "links": [], "maxDataPoints": 100, @@ -3502,7 +3472,7 @@ { "cacheTimeout": null, "datasource": "Prometheus", - "description": null, + "description": "Outgoing byte rate per second per worker.", "editable": true, "error": false, "fieldConfig": { @@ -3553,7 +3523,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 39, + "id": 38, "interval": null, "links": [], "maxDataPoints": 100, @@ -3602,7 +3572,7 @@ { "cacheTimeout": null, "datasource": "Prometheus", - "description": null, + "description": "Fraction of time the I/O thread spent doing I/O", "editable": true, "error": false, "fieldConfig": { @@ -3653,7 +3623,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 40, + "id": 39, "interval": null, "links": [], "maxDataPoints": 100, @@ -3702,7 +3672,7 @@ { "cacheTimeout": null, "datasource": "Prometheus", - "description": null, + "description": "Average number of network operations (reads or writes) on all connections per second", "editable": true, "error": false, "fieldConfig": { @@ -3753,7 +3723,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 41, + "id": 40, "interval": null, "links": [], "maxDataPoints": 100, @@ -3802,7 +3772,7 @@ { "cacheTimeout": null, "datasource": "Prometheus", - "description": null, + "description": "Number of active connections", "editable": true, "error": false, "fieldConfig": { @@ -3853,7 +3823,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 42, + "id": 41, "interval": null, "links": [], "maxDataPoints": 100, @@ -3902,7 +3872,7 @@ { "cacheTimeout": null, "datasource": "Prometheus", - "description": null, + "description": "Successful and failed authentications per second.", "editable": true, "error": false, "fieldConfig": { @@ -3953,7 +3923,7 @@ }, "height": null, "hideTimeOverride": false, - "id": 43, + "id": 42, "interval": null, "links": [], "maxDataPoints": 100, @@ -4008,7 +3978,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "Authentications", + "title": "Rate of Authentication", "transformations": [], "transparent": false, "type": "timeseries" diff --git a/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/ksqldb-cluster.json b/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/ksqldb-cluster.json index 0452da9c..9f98004e 100644 --- a/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/ksqldb-cluster.json +++ b/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/ksqldb-cluster.json @@ -123,7 +123,7 @@ "targets": [ { "datasource": null, - "expr": "count(ksql_ksql_engine_query_stats_num_active_queries{env=\"$env\", ksqldb_cluster_id=\"$ksqldb_cluster\"})", + "expr": "count(ksql_ksql_engine_query_stats_num_active_queries{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\"})", "format": "time_series", "hide": false, "instant": false, @@ -206,7 +206,7 @@ "targets": [ { "datasource": null, - "expr": "sum(ksql_ksql_engine_query_stats_num_active_queries{env=\"$env\", ksqldb_cluster_id=\"$ksqldb_cluster\"})", + "expr": "sum(ksql_ksql_engine_query_stats_num_active_queries{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\"})", "format": "time_series", "hide": false, "instant": false, @@ -297,7 +297,7 @@ "targets": [ { "datasource": null, - "expr": "sum(ksql_ksql_engine_query_stats_running_queries{env=\"$env\", ksqldb_cluster_id=\"$ksqldb_cluster\"})", + "expr": "sum(ksql_ksql_engine_query_stats_running_queries{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\"})", "format": "time_series", "hide": false, "instant": false, @@ -388,7 +388,7 @@ "targets": [ { "datasource": null, - "expr": "sum(ksql_ksql_engine_query_stats_rebalancing_queries{env=\"$env\", ksqldb_cluster_id=\"$ksqldb_cluster\"})", + "expr": "sum(ksql_ksql_engine_query_stats_rebalancing_queries{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\"})", "format": "time_series", "hide": false, "instant": false, @@ -479,7 +479,7 @@ "targets": [ { "datasource": null, - "expr": "avg(ksql_ksql_engine_query_stats_error_queries{env=\"$env\", ksqldb_cluster_id=\"$ksqldb_cluster\"})", + "expr": "avg(ksql_ksql_engine_query_stats_error_queries{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\"})", "format": "time_series", "hide": false, "instant": false, @@ -1250,7 +1250,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_streams_stream_thread_metrics_poll_latency_avg{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "expr": "kafka_streams_stream_thread_metrics_poll_latency_avg{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", "hide": false, "instant": false, @@ -1350,7 +1350,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_streams_stream_thread_metrics_poll_latency_max{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "expr": "kafka_streams_stream_thread_metrics_poll_latency_max{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", "hide": false, "instant": false, @@ -1450,7 +1450,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_streams_stream_thread_metrics_process_latency_avg{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "expr": "kafka_streams_stream_thread_metrics_process_latency_avg{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", "hide": false, "instant": false, @@ -1550,7 +1550,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_streams_stream_thread_metrics_process_latency_max{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "expr": "kafka_streams_stream_thread_metrics_process_latency_max{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", "hide": false, "instant": false, @@ -1650,7 +1650,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_streams_stream_thread_metrics_commit_latency_avg{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "expr": "kafka_streams_stream_thread_metrics_commit_latency_avg{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", "hide": false, "instant": false, @@ -1750,7 +1750,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_streams_stream_thread_metrics_commit_latency_max{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "expr": "kafka_streams_stream_thread_metrics_commit_latency_max{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", "hide": false, "instant": false, @@ -1850,7 +1850,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_streams_stream_thread_metrics_punctuate_latency_avg{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "expr": "kafka_streams_stream_thread_metrics_punctuate_latency_avg{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", "hide": false, "instant": false, @@ -1950,7 +1950,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_streams_stream_thread_metrics_punctuate_latency_max{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "expr": "kafka_streams_stream_thread_metrics_punctuate_latency_max{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", "hide": false, "instant": false, @@ -2092,7 +2092,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_streams_stream_state_metrics_put_rate{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "expr": "kafka_streams_stream_state_metrics_put_rate{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", "hide": false, "instant": false, @@ -2192,7 +2192,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_streams_stream_state_metrics_put_latency_avg{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "expr": "kafka_streams_stream_state_metrics_put_latency_avg{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", "hide": false, "instant": false, @@ -2292,7 +2292,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_streams_stream_state_metrics_put_latency_max{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "expr": "kafka_streams_stream_state_metrics_put_latency_max{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", "hide": false, "instant": false, @@ -2392,7 +2392,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_streams_stream_state_metrics_put_if_absent_rate{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "expr": "kafka_streams_stream_state_metrics_put_if_absent_rate{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", "hide": false, "instant": false, @@ -2492,7 +2492,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_streams_stream_state_metrics_put_if_absent_latency_avg{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "expr": "kafka_streams_stream_state_metrics_put_if_absent_latency_avg{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", "hide": false, "instant": false, @@ -2592,7 +2592,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_streams_stream_state_metrics_put_if_absent_latency_max{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "expr": "kafka_streams_stream_state_metrics_put_if_absent_latency_max{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", "hide": false, "instant": false, @@ -2692,7 +2692,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_streams_stream_state_metrics_fetch_rate{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "expr": "kafka_streams_stream_state_metrics_fetch_rate{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", "hide": false, "instant": false, @@ -2792,7 +2792,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_streams_stream_state_metrics_fetch_latency_avg{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "expr": "kafka_streams_stream_state_metrics_fetch_latency_avg{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", "hide": false, "instant": false, @@ -2892,7 +2892,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_streams_stream_state_metrics_fetch_latency_max{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "expr": "kafka_streams_stream_state_metrics_fetch_latency_max{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", "hide": false, "instant": false, @@ -2992,7 +2992,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_streams_stream_state_metrics_delete_rate{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "expr": "kafka_streams_stream_state_metrics_delete_rate{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", "hide": false, "instant": false, @@ -3092,7 +3092,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_streams_stream_state_metrics_delete_latency_avg{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "expr": "kafka_streams_stream_state_metrics_delete_latency_avg{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", "hide": false, "instant": false, @@ -3192,7 +3192,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_streams_stream_state_metrics_delete_latency_max{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "expr": "kafka_streams_stream_state_metrics_delete_latency_max{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", "hide": false, "instant": false, @@ -3292,7 +3292,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_streams_stream_state_metrics_restore_rate{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "expr": "kafka_streams_stream_state_metrics_restore_rate{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", "hide": false, "instant": false, @@ -3392,7 +3392,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_streams_stream_state_metrics_restore_latency_avg{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "expr": "kafka_streams_stream_state_metrics_restore_latency_avg{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", "hide": false, "instant": false, @@ -3492,7 +3492,7 @@ "targets": [ { "datasource": null, - "expr": "kafka_streams_stream_state_metrics_restore_latency_max{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\",thread_id=~\".+$ksqldb_cluster_id.+\"}", + "expr": "kafka_streams_stream_state_metrics_restore_latency_max{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", "format": "time_series", "hide": false, "instant": false, diff --git a/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/schema-registry-cluster.json b/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/schema-registry-cluster.json index caf43878..72402c7d 100644 --- a/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/schema-registry-cluster.json +++ b/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/schema-registry-cluster.json @@ -411,7 +411,7 @@ { "cacheTimeout": null, "datasource": "Prometheus", - "description": null, + "description": "Number of active connections", "editable": true, "error": false, "fieldConfig": { @@ -471,7 +471,7 @@ "targets": [ { "datasource": null, - "expr": "sum(kafka_schema_registry_kafka_schema_registry_metrics_connection_count)", + "expr": "sum(kafka_schema_registry_kafka_schema_registry_metrics_connection_count{env=\"$env\"})", "format": "time_series", "hide": false, "instant": false, @@ -612,7 +612,7 @@ "targets": [ { "datasource": null, - "expr": "irate(process_cpu_seconds_total{env=\"$env\",hostname=~\"$sr_server\"}[5m])", + "expr": "irate(process_cpu_seconds_total{env=\"$env\",hostname=\"$sr_server\"}[5m])", "format": "time_series", "hide": false, "instant": false, @@ -712,7 +712,7 @@ "targets": [ { "datasource": null, - "expr": "sum without(area)(jvm_memory_bytes_used{env=\"$env\",hostname=~\"$sr_server\"})", + "expr": "sum without(area)(jvm_memory_bytes_used{env=\"$env\",hostname=\"$sr_server\"})", "format": "time_series", "hide": false, "instant": false, @@ -812,7 +812,7 @@ "targets": [ { "datasource": null, - "expr": "sum without(gc)(irate(jvm_gc_collection_seconds_sum{env=\"$env\",hostname=~\"$sr_server\"}[5m]))", + "expr": "sum without(gc)(irate(jvm_gc_collection_seconds_sum{env=\"$env\",hostname=\"$sr_server\"}[5m]))", "format": "time_series", "hide": false, "instant": false, From 797f092b99a64ec7204d850bd9f1e245c2e682da Mon Sep 17 00:00:00 2001 From: Jorge Esteban Quilcate Otoya Date: Tue, 12 Jul 2022 21:25:16 +0100 Subject: [PATCH 26/28] docs: clients and topics --- .../grafana/confluent-platform.json | 2 +- .../grafana/kafka-cluster.json | 2 +- .../grafana/kafka-connect-cluster.json | 2 +- .../grafana/kafka-consumer.json | 192 ++++---- .../grafana/kafka-producer.json | 166 +++---- .../grafana/kafka-quotas.json | 34 +- .../grafana/kafka-topics.json | 225 +++------ .../grafana/ksqldb-cluster.json | 456 +++-------------- .../grafana/schema-registry-cluster.json | 2 +- .../grafana/zookeeper-cluster.json | 2 +- grafana-dashboards/confluent-platform.py | 2 +- grafana-dashboards/kafka-cluster.py | 2 +- grafana-dashboards/kafka-connect-cluster.py | 2 +- grafana-dashboards/kafka-consumer.py | 458 ++++++++---------- grafana-dashboards/kafka-producer.py | 391 +++++++-------- grafana-dashboards/kafka-quotas.py | 105 ++-- grafana-dashboards/kafka-topics.py | 217 +++++---- grafana-dashboards/ksqldb-cluster.py | 90 +--- grafana-dashboards/schema-registry-cluster.py | 2 +- grafana-dashboards/zookeeper-cluster.py | 2 +- .../dashboards/confluent-platform.json | 2 +- .../dashboards/kafka-cluster.json | 2 +- .../dashboards/kafka-connect-cluster.json | 2 +- .../dashboards/kafka-consumer.json | 104 ++-- .../dashboards/kafka-producer.json | 86 ++-- .../provisioning/dashboards/kafka-quotas.json | 14 +- .../provisioning/dashboards/kafka-topics.json | 207 ++------ .../dashboards/ksqldb-cluster.json | 456 +++-------------- .../dashboards/schema-registry-cluster.json | 2 +- .../dashboards/zookeeper-cluster.json | 2 +- 30 files changed, 1113 insertions(+), 2118 deletions(-) diff --git a/cfk-prometheus-grafana/grafana/confluent-platform.json b/cfk-prometheus-grafana/grafana/confluent-platform.json index d94f8840..55305913 100644 --- a/cfk-prometheus-grafana/grafana/confluent-platform.json +++ b/cfk-prometheus-grafana/grafana/confluent-platform.json @@ -2675,7 +2675,7 @@ ] }, "timezone": "browser", - "title": "Confluent Platform overview - v2", + "title": "Confluent Platform overview", "uid": null, "version": 0 } diff --git a/cfk-prometheus-grafana/grafana/kafka-cluster.json b/cfk-prometheus-grafana/grafana/kafka-cluster.json index 3db845de..74c899e8 100644 --- a/cfk-prometheus-grafana/grafana/kafka-cluster.json +++ b/cfk-prometheus-grafana/grafana/kafka-cluster.json @@ -5531,7 +5531,7 @@ ] }, "timezone": "browser", - "title": "Kafka cluster - v2", + "title": "Kafka cluster", "uid": null, "version": 0 } diff --git a/cfk-prometheus-grafana/grafana/kafka-connect-cluster.json b/cfk-prometheus-grafana/grafana/kafka-connect-cluster.json index 92b49191..39c2d217 100644 --- a/cfk-prometheus-grafana/grafana/kafka-connect-cluster.json +++ b/cfk-prometheus-grafana/grafana/kafka-connect-cluster.json @@ -4148,7 +4148,7 @@ ] }, "timezone": "browser", - "title": "Kafka Connect cluster - v2", + "title": "Kafka Connect cluster", "uid": null, "version": 0 } diff --git a/cfk-prometheus-grafana/grafana/kafka-consumer.json b/cfk-prometheus-grafana/grafana/kafka-consumer.json index 342da669..7c7f44a2 100644 --- a/cfk-prometheus-grafana/grafana/kafka-consumer.json +++ b/cfk-prometheus-grafana/grafana/kafka-consumer.json @@ -62,7 +62,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -123,7 +123,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10, kafka_consumer_consumer_fetch_manager_metrics_records_consumed_rate{namespace=\"$env\", client_type=\"producer\", client_id=~\"$client_id\", pod=~\"$server\"} > 0)", + "expr": "topk(10, kafka_consumer_consumer_fetch_manager_metrics_records_consumed_rate{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"} > 0)", "format": "time_series", "hide": false, "instant": false, @@ -145,7 +145,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -206,7 +206,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10, kafka_consumer_consumer_fetch_manager_metrics_records_consumed_rate{namespace=\"$env\", client_type=\"producer\", client_id=~\"$client_id\", pod=~\"$server\"} > 0)", + "expr": "topk(10, kafka_consumer_consumer_fetch_manager_metrics_records_consumed_rate{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"} > 0)", "format": "time_series", "hide": false, "instant": false, @@ -228,7 +228,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -305,7 +305,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10, kafka_consumer_consumer_coordinator_metrics_rebalance_rate_per_hour{namespace=\"$env\", client_type=\"producer\", client_id=~\"$client_id\", pod=~\"$server\"} > 0)", + "expr": "topk(10, kafka_consumer_consumer_coordinator_metrics_rebalance_rate_per_hour{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"} > 0)", "format": "time_series", "hide": false, "instant": false, @@ -327,7 +327,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -396,7 +396,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10, kafka_consumer_consumer_coordinator_metrics_failed_rebalance_rate_per_hour{namespace=\"$env\", client_type=\"producer\", client_id=~\"$client_id\", pod=~\"$server\"} > 0)", + "expr": "topk(10, kafka_consumer_consumer_coordinator_metrics_failed_rebalance_rate_per_hour{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"} > 0)", "format": "time_series", "hide": false, "instant": false, @@ -418,7 +418,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -479,7 +479,7 @@ "targets": [ { "datasource": null, - "expr": "count(kafka_consumer_app_info{namespace=\"$env\", client_id=~\"$client_id\", version!=\"\", pod=~\"$server\"}) by (version)", + "expr": "count(kafka_consumer_app_info{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\", version!=\"\"}) by (version)", "format": "time_series", "hide": false, "instant": false, @@ -531,7 +531,7 @@ "panels": [ { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -609,7 +609,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_bytes_consumed_rate{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_bytes_consumed_rate{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -631,7 +631,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -709,7 +709,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_records_consumed_rate{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_records_consumed_rate{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -731,7 +731,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -809,7 +809,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_records_lag_max{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_records_lag_max{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -831,7 +831,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -909,7 +909,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_rate{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_rate{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -931,7 +931,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -1009,7 +1009,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_latency_avg{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_latency_avg{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -1023,7 +1023,7 @@ }, { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_latency_max{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_latency_max{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -1045,7 +1045,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -1123,7 +1123,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_size_avg{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_size_avg{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -1137,7 +1137,7 @@ }, { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_size_max{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_size_max{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -1159,7 +1159,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -1237,7 +1237,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_throttle_time_avg{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_throttle_time_avg{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -1251,7 +1251,7 @@ }, { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_throttle_time_max{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_throttle_time_max{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -1315,7 +1315,7 @@ "panels": [ { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -1393,7 +1393,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_commit_rate{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_commit_rate{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -1415,7 +1415,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -1493,7 +1493,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_join_rate{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_join_rate{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -1515,7 +1515,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -1593,7 +1593,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_sync_rate{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_sync_rate{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -1615,7 +1615,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -1693,7 +1693,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_commit_latency_avg{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_commit_latency_avg{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -1707,7 +1707,7 @@ }, { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_commit_latency_max{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_commit_latency_max{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -1729,7 +1729,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -1807,7 +1807,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_join_time_avg{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_join_time_avg{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -1821,7 +1821,7 @@ }, { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_join_time_max{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_join_time_max{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -1843,7 +1843,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -1921,7 +1921,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_sync_time_avg{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_sync_time_avg{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -1935,7 +1935,7 @@ }, { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_sync_time_max{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_sync_time_max{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -1957,7 +1957,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -2035,7 +2035,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_heartbeat_rate{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_heartbeat_rate{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -2057,7 +2057,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -2135,7 +2135,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_heartbeat_response_time_max{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_heartbeat_response_time_max{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -2157,7 +2157,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -2235,7 +2235,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_last_heartbeat_seconds_ago{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_last_heartbeat_seconds_ago{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -2257,7 +2257,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -2335,7 +2335,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_rebalance_rate_per_hour{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_rebalance_rate_per_hour{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -2349,7 +2349,7 @@ }, { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_failed_rebalance_rate_per_hour{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_failed_rebalance_rate_per_hour{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -2371,7 +2371,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -2449,7 +2449,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_rebalance_latency_avg{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_rebalance_latency_avg{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -2463,7 +2463,7 @@ }, { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_rebalance_latency_max{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_rebalance_latency_max{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -2485,7 +2485,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -2563,7 +2563,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_assigned_partitions{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_assigned_partitions{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -2627,7 +2627,7 @@ "panels": [ { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -2705,7 +2705,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_metrics_connection_count{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_metrics_connection_count{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -2727,7 +2727,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -2805,7 +2805,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_metrics_connection_creation_rate{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_metrics_connection_creation_rate{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -2827,7 +2827,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -2905,7 +2905,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_metrics_connection_close_rate{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_metrics_connection_close_rate{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -2927,7 +2927,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -3005,7 +3005,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_metrics_io_ratio{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_metrics_io_ratio{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -3027,7 +3027,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -3105,7 +3105,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_metrics_io_wait_ratio{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_metrics_io_wait_ratio{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -3127,7 +3127,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -3205,7 +3205,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_metrics_select_rate{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_metrics_select_rate{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -3227,7 +3227,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -3305,7 +3305,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_metrics_io_time_ns_avg{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_metrics_io_time_ns_avg{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -3327,7 +3327,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -3405,7 +3405,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_metrics_io_wait_time_ns_avg{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_metrics_io_wait_time_ns_avg{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -3469,7 +3469,7 @@ "panels": [ { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -3547,7 +3547,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_node_metrics_incoming_byte_rate{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_node_metrics_incoming_byte_rate{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -3569,7 +3569,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -3647,7 +3647,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_node_metrics_outgoing_byte_rate{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_node_metrics_outgoing_byte_rate{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -3669,7 +3669,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -3747,7 +3747,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_node_metrics_request_latency_avg{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_node_metrics_request_latency_avg{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -3761,7 +3761,7 @@ }, { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_node_metrics_request_latency_max{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_node_metrics_request_latency_max{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -3783,7 +3783,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -3861,7 +3861,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_node_metrics_request_rate{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_node_metrics_request_rate{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -3883,7 +3883,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -3961,7 +3961,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_node_metrics_response_rate{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_node_metrics_response_rate{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -4025,7 +4025,7 @@ "panels": [ { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -4103,7 +4103,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_bytes_consumed_rate{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_bytes_consumed_rate{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -4125,7 +4125,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -4203,7 +4203,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_records_consumed_rate{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_records_consumed_rate{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -4225,7 +4225,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -4303,7 +4303,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_size_avg{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_size_avg{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -4317,7 +4317,7 @@ }, { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_size_max{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_size_max{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -4339,7 +4339,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -4417,7 +4417,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_records_per_request_avg{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_records_per_request_avg{namespace=\"$env\", client_type=\"consumer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -4473,7 +4473,7 @@ "text": null, "value": null }, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "hide": 0, "includeAll": false, "label": "Environment", @@ -4500,7 +4500,7 @@ "text": null, "value": null }, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "hide": 0, "includeAll": true, "label": "Server", @@ -4527,7 +4527,7 @@ "text": null, "value": null }, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "hide": 0, "includeAll": true, "label": "Client ID", @@ -4576,7 +4576,7 @@ ] }, "timezone": "browser", - "title": "Kafka Consumer - v2", + "title": "Kafka Consumer", "uid": null, "version": 0 } diff --git a/cfk-prometheus-grafana/grafana/kafka-producer.json b/cfk-prometheus-grafana/grafana/kafka-producer.json index 08afbea0..ff50cd48 100644 --- a/cfk-prometheus-grafana/grafana/kafka-producer.json +++ b/cfk-prometheus-grafana/grafana/kafka-producer.json @@ -62,7 +62,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -123,7 +123,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10, kafka_producer_producer_metrics_record_send_rate{namespace=\"$env\", client_type=\"producer\", client_id=~\"$client_id\", pod=~\"$server\"} > 0)", + "expr": "topk(10, kafka_producer_producer_metrics_record_send_rate{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"} > 0)", "format": "time_series", "hide": false, "instant": false, @@ -145,7 +145,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -214,7 +214,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10, kafka_producer_producer_metrics_record_error_rate{namespace=\"$env\", client_type=\"producer\", client_id=~\"$client_id\", pod=~\"$server\"} > 0)", + "expr": "topk(10, kafka_producer_producer_metrics_record_error_rate{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"} > 0)", "format": "time_series", "hide": false, "instant": false, @@ -236,7 +236,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -313,7 +313,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10, kafka_producer_producer_metrics_record_retry_rate{namespace=\"$env\", client_type=\"producer\", client_id=~\"$client_id\", pod=~\"$server\"} > 0)", + "expr": "topk(10, kafka_producer_producer_metrics_record_retry_rate{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"} > 0)", "format": "time_series", "hide": false, "instant": false, @@ -335,7 +335,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -396,7 +396,7 @@ "targets": [ { "datasource": null, - "expr": "count(kafka_producer_app_info{namespace=\"$env\", client_id=~\"$client_id\", version!=\"\", pod=~\"$server\"}) by (version)", + "expr": "count(kafka_producer_app_info{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\",version!=\"\"}) by (version)", "format": "time_series", "hide": false, "instant": false, @@ -448,7 +448,7 @@ "panels": [ { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -526,7 +526,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_incoming_byte_rate{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_producer_producer_metrics_incoming_byte_rate{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -548,7 +548,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -626,7 +626,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_outgoing_byte_rate{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_producer_producer_metrics_outgoing_byte_rate{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -648,7 +648,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -726,7 +726,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_metadata_age{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_producer_producer_metrics_metadata_age{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -748,7 +748,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -826,7 +826,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_request_rate{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_producer_producer_metrics_request_rate{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -848,7 +848,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -926,7 +926,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_requests_in_flight{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_producer_producer_metrics_requests_in_flight{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -948,7 +948,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -1026,7 +1026,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_records_per_request_avg{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_producer_producer_metrics_records_per_request_avg{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -1048,7 +1048,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -1126,7 +1126,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_record_send_rate{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_producer_producer_metrics_record_send_rate{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -1148,7 +1148,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -1226,7 +1226,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_record_retry_rate{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_producer_producer_metrics_record_retry_rate{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -1248,7 +1248,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -1326,7 +1326,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_record_error_rate{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_producer_producer_metrics_record_error_rate{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -1348,7 +1348,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -1426,7 +1426,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_record_size_avg{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_producer_producer_metrics_record_size_avg{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -1440,7 +1440,7 @@ }, { "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_record_size_max{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_producer_producer_metrics_record_size_max{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -1462,7 +1462,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -1540,7 +1540,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_record_queue_time_avg{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_producer_producer_metrics_record_queue_time_avg{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -1554,7 +1554,7 @@ }, { "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_record_queue_time_max{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_producer_producer_metrics_record_queue_time_max{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -1576,7 +1576,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -1654,7 +1654,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_produce_throttle_time_avg{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_producer_producer_metrics_produce_throttle_time_avg{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -1668,7 +1668,7 @@ }, { "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_produce_throttle_time_max{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_producer_producer_metrics_produce_throttle_time_max{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -1690,7 +1690,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -1768,7 +1768,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_batch_size_avg{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_producer_producer_metrics_batch_size_avg{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -1782,7 +1782,7 @@ }, { "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_batch_size_max{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_producer_producer_metrics_batch_size_max{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -1804,7 +1804,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -1882,7 +1882,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_batch_split_rate{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_producer_producer_metrics_batch_split_rate{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -1904,7 +1904,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -1982,7 +1982,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_compression_rate_avg{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_producer_producer_metrics_compression_rate_avg{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -2046,7 +2046,7 @@ "panels": [ { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -2124,7 +2124,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_connection_count{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_producer_producer_metrics_connection_count{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -2146,7 +2146,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -2224,7 +2224,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_connection_creation_rate{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_producer_producer_metrics_connection_creation_rate{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -2246,7 +2246,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -2324,7 +2324,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_connection_close_rate{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_producer_producer_metrics_connection_close_rate{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -2346,7 +2346,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -2424,7 +2424,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_io_ratio{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_producer_producer_metrics_io_ratio{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -2446,7 +2446,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -2524,7 +2524,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_io_wait_ratio{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_producer_producer_metrics_io_wait_ratio{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -2546,7 +2546,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -2624,7 +2624,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_select_rate{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_producer_producer_metrics_select_rate{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -2646,7 +2646,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -2724,7 +2724,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_io_time_ns_avg{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_producer_producer_metrics_io_time_ns_avg{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -2746,7 +2746,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -2824,7 +2824,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_io_wait_time_ns_avg{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_producer_producer_metrics_io_wait_time_ns_avg{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -2888,7 +2888,7 @@ "panels": [ { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -2966,7 +2966,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_producer_producer_node_metrics_incoming_byte_rate{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_producer_producer_node_metrics_incoming_byte_rate{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -2988,7 +2988,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -3066,7 +3066,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_producer_producer_node_metrics_outgoing_byte_rate{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_producer_producer_node_metrics_outgoing_byte_rate{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -3088,7 +3088,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -3166,7 +3166,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_producer_producer_node_metrics_request_latency_avg{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_producer_producer_node_metrics_request_latency_avg{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -3180,7 +3180,7 @@ }, { "datasource": null, - "expr": "topk(10,kafka_producer_producer_node_metrics_request_latency_max{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_producer_producer_node_metrics_request_latency_max{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -3202,7 +3202,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -3280,7 +3280,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_producer_producer_node_metrics_request_rate{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_producer_producer_node_metrics_request_rate{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -3302,7 +3302,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -3380,7 +3380,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_producer_producer_node_metrics_response_rate{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_producer_producer_node_metrics_response_rate{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -3444,7 +3444,7 @@ "panels": [ { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -3522,7 +3522,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_producer_producer_topic_metrics_byte_rate{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_producer_producer_topic_metrics_byte_rate{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -3544,7 +3544,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -3622,7 +3622,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_producer_producer_topic_metrics_compression_rate{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_producer_producer_topic_metrics_compression_rate{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -3644,7 +3644,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -3722,7 +3722,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_producer_producer_topic_metrics_record_send_rate{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_producer_producer_topic_metrics_record_send_rate{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -3744,7 +3744,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -3822,7 +3822,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_producer_producer_topic_metrics_record_retry_rate{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_producer_producer_topic_metrics_record_retry_rate{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -3844,7 +3844,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -3922,7 +3922,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_producer_producer_topic_metrics_record_error_rate{namespace=\"$env\",client_id=~\"$client_id\", pod=~\"$server\"})", + "expr": "topk(10,kafka_producer_producer_topic_metrics_record_error_rate{namespace=\"$env\", client_type=\"producer\",pod=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -3978,7 +3978,7 @@ "text": null, "value": null }, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "hide": 0, "includeAll": false, "label": "Environment", @@ -4005,7 +4005,7 @@ "text": null, "value": null }, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "hide": 0, "includeAll": true, "label": "Server", @@ -4032,7 +4032,7 @@ "text": null, "value": null }, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "hide": 0, "includeAll": true, "label": "Client ID", @@ -4081,7 +4081,7 @@ ] }, "timezone": "browser", - "title": "Kafka Producer - v2", + "title": "Kafka Producer", "uid": null, "version": 0 } diff --git a/cfk-prometheus-grafana/grafana/kafka-quotas.json b/cfk-prometheus-grafana/grafana/kafka-quotas.json index 08caa085..17ba4685 100644 --- a/cfk-prometheus-grafana/grafana/kafka-quotas.json +++ b/cfk-prometheus-grafana/grafana/kafka-quotas.json @@ -21,7 +21,7 @@ "panels": [ { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -99,7 +99,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_server_produce_byte_rate{namespace=\"$env\",user=~\"$user\",client_id=~\"$client_id\", pod=~\"$broker\"})", + "expr": "topk(10,kafka_server_produce_byte_rate{namespace=\"$env\",user=~\"$user\",client_id=~\"$client_id\",pod=~\"$broker\"})", "format": "time_series", "hide": false, "instant": false, @@ -121,7 +121,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -199,7 +199,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_server_fetch_byte_rate{namespace=\"$env\",user=~\"$user\",client_id=~\"$client_id\", pod=~\"$broker\"})", + "expr": "topk(10,kafka_server_fetch_byte_rate{namespace=\"$env\",user=~\"$user\",client_id=~\"$client_id\",pod=~\"$broker\"})", "format": "time_series", "hide": false, "instant": false, @@ -221,7 +221,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -299,7 +299,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_server_request_request_time{namespace=\"$env\",user=~\"$user\",client_id=~\"$client_id\", pod=~\"$broker\"})", + "expr": "topk(10,kafka_server_request_request_time{namespace=\"$env\",user=~\"$user\",client_id=~\"$client_id\",pod=~\"$broker\"})", "format": "time_series", "hide": false, "instant": false, @@ -321,7 +321,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -399,7 +399,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_server_produce_throttle_time{namespace=\"$env\",user=~\"$user\",client_id=~\"$client_id\", pod=~\"$broker\"} > 0)", + "expr": "topk(10,kafka_server_produce_throttle_time{namespace=\"$env\",user=~\"$user\",client_id=~\"$client_id\",pod=~\"$broker\"} > 0)", "format": "time_series", "hide": false, "instant": false, @@ -421,7 +421,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -499,7 +499,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_server_fetch_throttle_time{namespace=\"$env\",user=~\"$user\",client_id=~\"$client_id\", pod=~\"$broker\"} > 0)", + "expr": "topk(10,kafka_server_fetch_throttle_time{namespace=\"$env\",user=~\"$user\",client_id=~\"$client_id\",pod=~\"$broker\"} > 0)", "format": "time_series", "hide": false, "instant": false, @@ -521,7 +521,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -599,7 +599,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_server_request_throttle_time{namespace=\"$env\",user=~\"$user\",client_id=~\"$client_id\", pod=~\"$broker\"} > 0)", + "expr": "topk(10,kafka_server_request_throttle_time{namespace=\"$env\",user=~\"$user\",client_id=~\"$client_id\",pod=~\"$broker\"} > 0)", "format": "time_series", "hide": false, "instant": false, @@ -643,7 +643,7 @@ "text": null, "value": null }, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "hide": 0, "includeAll": false, "label": "Environment", @@ -670,7 +670,7 @@ "text": null, "value": null }, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "hide": 0, "includeAll": true, "label": "Broker", @@ -697,7 +697,7 @@ "text": null, "value": null }, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "hide": 0, "includeAll": true, "label": "User", @@ -724,7 +724,7 @@ "text": null, "value": null }, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "hide": 0, "includeAll": true, "label": "Client ID", @@ -773,7 +773,7 @@ ] }, "timezone": "browser", - "title": "Kafka Quotas - v2", + "title": "Kafka Quotas", "uid": null, "version": 0 } diff --git a/cfk-prometheus-grafana/grafana/kafka-topics.json b/cfk-prometheus-grafana/grafana/kafka-topics.json index 20e0198c..edbbb45b 100644 --- a/cfk-prometheus-grafana/grafana/kafka-topics.json +++ b/cfk-prometheus-grafana/grafana/kafka-topics.json @@ -62,7 +62,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -142,7 +142,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10, sum without(instance,pod,statefulset_kubernetes_io_pod_name) (rate(kafka_server_brokertopicmetrics_messagesinpersec{topic=~\"$topic\",namespace=\"$env\"}[5m])))", + "expr": "topk(10, sum without(instance,pod,statefulset_kubernetes_io_pod_name) (rate(kafka_server_brokertopicmetrics_messagesinpersec{namespace=\"$env\",topic=~\"$topic\"}[5m])))", "format": "time_series", "hide": false, "instant": false, @@ -164,7 +164,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -266,7 +266,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -346,7 +346,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10, sum without(instance,pod,statefulset_kubernetes_io_pod_name) (rate(kafka_server_brokertopicmetrics_bytesinpersec{topic=~\"$topic\",namespace=\"$env\"}[5m])))", + "expr": "topk(10, sum without(instance,pod,statefulset_kubernetes_io_pod_name) (rate(kafka_server_brokertopicmetrics_bytesinpersec{namespace=\"$env\",topic=~\"$topic\"}[5m])))", "format": "time_series", "hide": false, "instant": false, @@ -368,7 +368,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -448,7 +448,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10, sum without(instance,pod,statefulset_kubernetes_io_pod_name) (rate(kafka_server_brokertopicmetrics_bytesoutpersec{topic=~\"$topic\",namespace=\"$env\"}[5m])))", + "expr": "topk(10, sum without(instance,pod,statefulset_kubernetes_io_pod_name) (rate(kafka_server_brokertopicmetrics_bytesoutpersec{namespace=\"$env\",topic=~\"$topic\"}[5m])))", "format": "time_series", "hide": false, "instant": false, @@ -470,7 +470,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -550,7 +550,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10, sum(rate(kafka_server_brokertopicmetrics_totalproducerequestspersec{ namespace=\"$env\", topic=~\"$topic\"}[5m])) by (topic))", + "expr": "topk(10, sum(rate(kafka_server_brokertopicmetrics_totalproducerequestspersec{ namespace=\"$env\",topic=~\"$topic\"}[5m])) by (topic))", "format": "time_series", "hide": false, "instant": false, @@ -572,7 +572,7 @@ }, { "cacheTimeout": null, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -652,7 +652,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10, sum(rate(kafka_server_brokertopicmetrics_totalfetchrequestspersec{ namespace=\"$env\", topic=~\"$topic\"}[5m])) by (topic))", + "expr": "topk(10, sum(rate(kafka_server_brokertopicmetrics_totalfetchrequestspersec{ namespace=\"$env\",topic=~\"$topic\"}[5m])) by (topic))", "format": "time_series", "hide": false, "instant": false, @@ -708,7 +708,7 @@ "mode": "thresholds" }, "columns": [], - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "description": null, "editable": true, "error": false, @@ -729,7 +729,7 @@ "fontSize": "100%", "gridPos": { "h": 10, - "w": 12, + "w": 24, "x": 0, "y": 3 }, @@ -762,126 +762,7 @@ "refId": "", "step": 10, "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Start Offsets", - "transformations": [ - { - "id": "organize", - "options": { - "excludeByName": { - "Time": true, - "__name__": true, - "app": true, - "clusterId": true, - "confluentPlatform": true, - "confluent_platform": true, - "controller_revision_hash": true, - "instance": true, - "job": true, - "namespace": true, - "platform_confluent_io_type": true, - "statefulset_kubernetes_io_pod_name": true, - "type": true - }, - "indexByName": { - "Value": 4, - "partition": 3, - "pod": 1, - "topic": 2 - }, - "renameByName": { - "Value": "offset" - } - } }, - { - "id": "convertFieldType", - "options": { - "conversions": [ - { - "destinationType": "number", - "targetField": "partition" - } - ], - "fields": {} - } - }, - { - "id": "sortBy", - "options": { - "fields": {}, - "sort": [ - { - "field": "topic" - } - ] - } - }, - { - "id": "sortBy", - "options": { - "fields": {}, - "sort": [ - { - "field": "partition" - } - ] - } - } - ], - "transparent": false, - "type": "table" - }, - { - "cacheTimeout": null, - "color": { - "mode": "thresholds" - }, - "columns": [], - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": { - "align": "auto", - "displayMode": "auto", - "filterable": true - }, - "thresholds": { - "mode": "absolute", - "steps": [] - } - }, - "overrides": [] - }, - "fontSize": "100%", - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 3 - }, - "height": null, - "hideTimeOverride": false, - "id": 10, - "interval": null, - "links": [], - "mappings": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "showHeader": true - }, - "repeat": null, - "repeatDirection": null, - "span": 6, - "targets": [ { "datasource": null, "expr": "kafka_log_log_logendoffset{namespace=\"$env\",topic=~\"$topic\"}", @@ -899,47 +780,27 @@ ], "timeFrom": null, "timeShift": null, - "title": "End Offsets", + "title": "Offsets", "transformations": [ { - "id": "organize", + "id": "concatenate", "options": { - "excludeByName": { - "Time": true, - "__name__": true, - "app": true, - "clusterId": true, - "confluentPlatform": true, - "confluent_platform": true, - "controller_revision_hash": true, - "instance": true, - "job": true, - "namespace": true, - "platform_confluent_io_type": true, - "statefulset_kubernetes_io_pod_name": true, - "type": true - }, - "indexByName": { - "Value": 4, - "partition": 3, - "pod": 1, - "topic": 2 - }, - "renameByName": { - "Value": "offset" - } + "frameNameLabel": "id", + "frameNameMode": "label" } }, { - "id": "convertFieldType", + "id": "filterFieldsByName", "options": { - "conversions": [ - { - "destinationType": "number", - "targetField": "partition" - } - ], - "fields": {} + "include": { + "names": [ + "pod 1", + "topic 1", + "Value #A", + "Value #B", + "partition 1" + ] + } } }, { @@ -948,7 +809,7 @@ "fields": {}, "sort": [ { - "field": "topic" + "field": "partition 1" } ] } @@ -959,10 +820,32 @@ "fields": {}, "sort": [ { - "field": "partition" + "field": "topic 1" } ] } + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "Value #A": 3, + "Value #B": 4, + "hostname 1": 0, + "id": 5, + "partition 1": 2, + "topic 1": 1 + }, + "renameByName": { + "Value #A": "start offset", + "Value #B": "end offset", + "hostname 1": "broker", + "partition 1": "", + "topic": "", + "topic 1": "" + } + } } ], "transparent": false, @@ -1003,7 +886,7 @@ "text": null, "value": null }, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "hide": 0, "includeAll": false, "label": "Environment", @@ -1030,7 +913,7 @@ "text": null, "value": null }, - "datasource": "Prometheus", + "datasource": "${DS_PROMETHEUS}", "hide": 0, "includeAll": true, "label": "Topic", @@ -1079,7 +962,7 @@ ] }, "timezone": "browser", - "title": "Kafka topics - v2", + "title": "Kafka topics", "uid": null, "version": 0 } diff --git a/cfk-prometheus-grafana/grafana/ksqldb-cluster.json b/cfk-prometheus-grafana/grafana/ksqldb-cluster.json index 0e2eb79e..ff7a2334 100644 --- a/cfk-prometheus-grafana/grafana/ksqldb-cluster.json +++ b/cfk-prometheus-grafana/grafana/ksqldb-cluster.json @@ -502,7 +502,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "A metric with constant value 1 indicating the server is up and emitting metrics.", "editable": true, "error": false, "fieldConfig": { @@ -602,7 +602,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "The number of messages consumed per second across all queries.", "editable": true, "error": false, "fieldConfig": { @@ -702,7 +702,7 @@ { "cacheTimeout": null, "datasource": "${DS_PROMETHEUS}", - "description": null, + "description": "The number of messages produced per second across all queries.", "editable": true, "error": false, "fieldConfig": { @@ -1256,98 +1256,12 @@ "instant": false, "interval": "", "intervalFactor": 2, - "legendFormat": "{{thread_id}}", + "legendFormat": "{{thread_id}} (avg.)", "metric": "", "refId": "", "step": 10, "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Poll Latency (Avg.)", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 3 - }, - "height": null, - "hideTimeOverride": false, - "id": 16, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ { "datasource": null, "expr": "kafka_streams_stream_thread_metrics_poll_latency_max{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", @@ -1356,7 +1270,7 @@ "instant": false, "interval": "", "intervalFactor": 2, - "legendFormat": "{{thread_id}}", + "legendFormat": "{{thread_id}} (max.)", "metric": "", "refId": "", "step": 10, @@ -1365,7 +1279,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "Poll Latency (Max.)", + "title": "Poll Latency", "transformations": [], "transparent": false, "type": "timeseries" @@ -1419,12 +1333,12 @@ "gridPos": { "h": 10, "w": 8, - "x": 0, - "y": 4 + "x": 8, + "y": 3 }, "height": null, "hideTimeOverride": false, - "id": 17, + "id": 16, "interval": null, "links": [], "maxDataPoints": 100, @@ -1456,98 +1370,12 @@ "instant": false, "interval": "", "intervalFactor": 2, - "legendFormat": "{{thread_id}}", + "legendFormat": "{{thread_id}} (avg.)", "metric": "", "refId": "", "step": 10, "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Process Latency (Avg.)", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 4 - }, - "height": null, - "hideTimeOverride": false, - "id": 18, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ { "datasource": null, "expr": "kafka_streams_stream_thread_metrics_process_latency_max{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", @@ -1556,7 +1384,7 @@ "instant": false, "interval": "", "intervalFactor": 2, - "legendFormat": "{{thread_id}}", + "legendFormat": "{{thread_id}} (max.)", "metric": "", "refId": "", "step": 10, @@ -1565,7 +1393,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "Process Latency (Max.)", + "title": "Process Latency", "transformations": [], "transparent": false, "type": "timeseries" @@ -1620,11 +1448,11 @@ "h": 10, "w": 8, "x": 0, - "y": 5 + "y": 4 }, "height": null, "hideTimeOverride": false, - "id": 19, + "id": 17, "interval": null, "links": [], "maxDataPoints": 100, @@ -1656,98 +1484,12 @@ "instant": false, "interval": "", "intervalFactor": 2, - "legendFormat": "{{thread_id}}", + "legendFormat": "{{thread_id}} (avg.)", "metric": "", "refId": "", "step": 10, "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Commit Latency (Avg.)", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 5 - }, - "height": null, - "hideTimeOverride": false, - "id": 20, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ { "datasource": null, "expr": "kafka_streams_stream_thread_metrics_commit_latency_max{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", @@ -1756,7 +1498,7 @@ "instant": false, "interval": "", "intervalFactor": 2, - "legendFormat": "{{thread_id}}", + "legendFormat": "{{thread_id}} (max.)", "metric": "", "refId": "", "step": 10, @@ -1765,7 +1507,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "Commit Latency (Max.)", + "title": "Commit Latency", "transformations": [], "transparent": false, "type": "timeseries" @@ -1819,12 +1561,12 @@ "gridPos": { "h": 10, "w": 8, - "x": 0, - "y": 6 + "x": 8, + "y": 4 }, "height": null, "hideTimeOverride": false, - "id": 21, + "id": 18, "interval": null, "links": [], "maxDataPoints": 100, @@ -1856,98 +1598,12 @@ "instant": false, "interval": "", "intervalFactor": 2, - "legendFormat": "{{thread_id}}", + "legendFormat": "{{thread_id}} (avg.)", "metric": "", "refId": "", "step": 10, "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Punctuate Latency (Avg.)", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "${DS_PROMETHEUS}", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 6 - }, - "height": null, - "hideTimeOverride": false, - "id": 22, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ { "datasource": null, "expr": "kafka_streams_stream_thread_metrics_punctuate_latency_max{namespace=\"$env\",app=\"$ksqldb_cluster\",pod=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", @@ -1956,7 +1612,7 @@ "instant": false, "interval": "", "intervalFactor": 2, - "legendFormat": "{{thread_id}}", + "legendFormat": "{{thread_id}} (max.)", "metric": "", "refId": "", "step": 10, @@ -1965,7 +1621,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "Punctuate Latency (Max.)", + "title": "Punctuate Latency", "transformations": [], "transparent": false, "type": "timeseries" @@ -2001,11 +1657,11 @@ "h": 1, "w": 24, "x": 0, - "y": 7 + "y": 5 }, "height": null, "hideTimeOverride": false, - "id": 23, + "id": 19, "interval": null, "links": [], "maxDataPoints": 100, @@ -2062,11 +1718,11 @@ "h": 10, "w": 8, "x": 0, - "y": 7 + "y": 5 }, "height": null, "hideTimeOverride": false, - "id": 24, + "id": 20, "interval": null, "links": [], "maxDataPoints": 100, @@ -2162,11 +1818,11 @@ "h": 10, "w": 8, "x": 8, - "y": 7 + "y": 5 }, "height": null, "hideTimeOverride": false, - "id": 25, + "id": 21, "interval": null, "links": [], "maxDataPoints": 100, @@ -2262,11 +1918,11 @@ "h": 10, "w": 8, "x": 16, - "y": 7 + "y": 5 }, "height": null, "hideTimeOverride": false, - "id": 26, + "id": 22, "interval": null, "links": [], "maxDataPoints": 100, @@ -2362,11 +2018,11 @@ "h": 10, "w": 8, "x": 0, - "y": 8 + "y": 6 }, "height": null, "hideTimeOverride": false, - "id": 27, + "id": 23, "interval": null, "links": [], "maxDataPoints": 100, @@ -2462,11 +2118,11 @@ "h": 10, "w": 8, "x": 8, - "y": 8 + "y": 6 }, "height": null, "hideTimeOverride": false, - "id": 28, + "id": 24, "interval": null, "links": [], "maxDataPoints": 100, @@ -2562,11 +2218,11 @@ "h": 10, "w": 8, "x": 16, - "y": 8 + "y": 6 }, "height": null, "hideTimeOverride": false, - "id": 29, + "id": 25, "interval": null, "links": [], "maxDataPoints": 100, @@ -2662,11 +2318,11 @@ "h": 10, "w": 8, "x": 0, - "y": 9 + "y": 7 }, "height": null, "hideTimeOverride": false, - "id": 30, + "id": 26, "interval": null, "links": [], "maxDataPoints": 100, @@ -2762,11 +2418,11 @@ "h": 10, "w": 8, "x": 8, - "y": 9 + "y": 7 }, "height": null, "hideTimeOverride": false, - "id": 31, + "id": 27, "interval": null, "links": [], "maxDataPoints": 100, @@ -2862,11 +2518,11 @@ "h": 10, "w": 8, "x": 16, - "y": 9 + "y": 7 }, "height": null, "hideTimeOverride": false, - "id": 32, + "id": 28, "interval": null, "links": [], "maxDataPoints": 100, @@ -2962,11 +2618,11 @@ "h": 10, "w": 8, "x": 0, - "y": 10 + "y": 8 }, "height": null, "hideTimeOverride": false, - "id": 33, + "id": 29, "interval": null, "links": [], "maxDataPoints": 100, @@ -3062,11 +2718,11 @@ "h": 10, "w": 8, "x": 8, - "y": 10 + "y": 8 }, "height": null, "hideTimeOverride": false, - "id": 34, + "id": 30, "interval": null, "links": [], "maxDataPoints": 100, @@ -3162,11 +2818,11 @@ "h": 10, "w": 8, "x": 16, - "y": 10 + "y": 8 }, "height": null, "hideTimeOverride": false, - "id": 35, + "id": 31, "interval": null, "links": [], "maxDataPoints": 100, @@ -3262,11 +2918,11 @@ "h": 10, "w": 8, "x": 0, - "y": 11 + "y": 9 }, "height": null, "hideTimeOverride": false, - "id": 36, + "id": 32, "interval": null, "links": [], "maxDataPoints": 100, @@ -3362,11 +3018,11 @@ "h": 10, "w": 8, "x": 8, - "y": 11 + "y": 9 }, "height": null, "hideTimeOverride": false, - "id": 37, + "id": 33, "interval": null, "links": [], "maxDataPoints": 100, @@ -3462,11 +3118,11 @@ "h": 10, "w": 8, "x": 16, - "y": 11 + "y": 9 }, "height": null, "hideTimeOverride": false, - "id": 38, + "id": 34, "interval": null, "links": [], "maxDataPoints": 100, @@ -3677,7 +3333,7 @@ ] }, "timezone": "browser", - "title": "ksqlDB cluster - v2", + "title": "ksqlDB cluster", "uid": null, "version": 0 } diff --git a/cfk-prometheus-grafana/grafana/schema-registry-cluster.json b/cfk-prometheus-grafana/grafana/schema-registry-cluster.json index 1027687a..3b0dd952 100644 --- a/cfk-prometheus-grafana/grafana/schema-registry-cluster.json +++ b/cfk-prometheus-grafana/grafana/schema-registry-cluster.json @@ -931,7 +931,7 @@ ] }, "timezone": "browser", - "title": "Schema Registry cluster - v2", + "title": "Schema Registry cluster", "uid": null, "version": 0 } diff --git a/cfk-prometheus-grafana/grafana/zookeeper-cluster.json b/cfk-prometheus-grafana/grafana/zookeeper-cluster.json index 3fa22d69..156bea72 100644 --- a/cfk-prometheus-grafana/grafana/zookeeper-cluster.json +++ b/cfk-prometheus-grafana/grafana/zookeeper-cluster.json @@ -1908,7 +1908,7 @@ ] }, "timezone": "browser", - "title": "Zookeeper cluster - v2", + "title": "Zookeeper cluster", "uid": null, "version": 0 } diff --git a/grafana-dashboards/confluent-platform.py b/grafana-dashboards/confluent-platform.py index af5aa005..ab6ff90e 100644 --- a/grafana-dashboards/confluent-platform.py +++ b/grafana-dashboards/confluent-platform.py @@ -666,7 +666,7 @@ def dashboard( # build dashboard return G.Dashboard( - title="Confluent Platform overview - v2", + title="Confluent Platform overview", description="Overview of the main health-check metrics from Confluent Platform components.", tags=[ "confluent", diff --git a/grafana-dashboards/kafka-cluster.py b/grafana-dashboards/kafka-cluster.py index f05ee74e..53ccd5d3 100644 --- a/grafana-dashboards/kafka-cluster.py +++ b/grafana-dashboards/kafka-cluster.py @@ -1282,7 +1282,7 @@ def dashboard(ds="Prometheus", env_label="namespace", server_label="pod"): # build dashboard return G.Dashboard( - title="Kafka cluster - v2", + title="Kafka cluster", description="Overview of the Kafka cluster", tags=["confluent", "kafka"], inputs=[ diff --git a/grafana-dashboards/kafka-connect-cluster.py b/grafana-dashboards/kafka-connect-cluster.py index 4378469e..11fd70cf 100644 --- a/grafana-dashboards/kafka-connect-cluster.py +++ b/grafana-dashboards/kafka-connect-cluster.py @@ -1066,7 +1066,7 @@ def dashboard( # build dashboard return G.Dashboard( - title="Kafka Connect cluster - v2", + title="Kafka Connect cluster", description="Overview of the Kafka Connect cluster", tags=["confluent", "kafka-connect"], inputs=[ diff --git a/grafana-dashboards/kafka-consumer.py b/grafana-dashboards/kafka-consumer.py index 4697547c..eb3cf89a 100644 --- a/grafana-dashboards/kafka-consumer.py +++ b/grafana-dashboards/kafka-consumer.py @@ -2,26 +2,59 @@ import grafanalib.core as G -def dashboard(env_label="namespace", server_label="pod"): +def dashboard(ds="Prometheus", env_label="namespace", server_label="pod"): + """ + Kafka Consumer dashboard + It includes: + - Clients overview + - Performance + - Consumer Group + - Connections + - Per Broker + - Per Topic + + Structure: + - Default sizes + - Queries + - Templating (variables) + - Panel groups + - Dashboard definition + + Dashboard is defined by a name, it includes the variables to template panels, and then adds the panels. + Panels are grouped in Row to load only needed panels and load others on demand. + + Invariants: + - Max width: 24 + """ + + # Default sizes default_height = 5 stat_width = 4 ts_width = 8 + topk = "10" + + # Queries + by_env = env_label + '="$env"' + by_consumer = by_env + ', client_type="consumer"' + by_server = by_consumer + "," + server_label + '=~"$server"' + by_client = by_server + ', client_id=~"$client_id"' + # Templating (variables) templating = G.Templating( list=[ G.Template( name="env", label="Environment", - dataSource="Prometheus", + dataSource=ds, query="label_values(" + env_label + ")", ), G.Template( name="server", label="Server", - dataSource="Prometheus", + dataSource=ds, query="label_values(kafka_consumer_consumer_metrics_incoming_byte_rate{" - + env_label - + '="$env", client_type="consumer"},' + + by_consumer + + "}," + server_label + ")", multi=True, @@ -30,18 +63,18 @@ def dashboard(env_label="namespace", server_label="pod"): G.Template( name="client_id", label="Client ID", - dataSource="Prometheus", + dataSource=ds, query="label_values(kafka_consumer_consumer_metrics_incoming_byte_rate{" - + env_label - + '="$env", client_type="consumer"},client_id)', + + by_consumer + + "},client_id)", multi=True, includeAll=True, ), ] ) - topk = "10" - + # Panel groups + ## Clients overview: overview_base = 0 overview_panels = [ G.RowPanel( @@ -50,16 +83,14 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.Stat( title="Record Consumed Rate", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ", kafka_consumer_consumer_fetch_manager_metrics_records_consumed_rate{" - + env_label - + '="$env", client_type="producer", client_id=~"$client_id", ' - + server_label - + '=~"$server"} > 0)', + + by_client + + "} > 0)", legendFormat="{{client_id}}@{{" + server_label + "}}", ), ], @@ -71,16 +102,14 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.Stat( title="Records Lag", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ", kafka_consumer_consumer_fetch_manager_metrics_records_consumed_rate{" - + env_label - + '="$env", client_type="producer", client_id=~"$client_id", ' - + server_label - + '=~"$server"} > 0)', + + by_client + + "} > 0)", legendFormat="{{client_id}}@{{" + server_label + "}}", ), ], @@ -92,16 +121,14 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.Stat( title="Rebalance Rate per hour", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ", kafka_consumer_consumer_coordinator_metrics_rebalance_rate_per_hour{" - + env_label - + '="$env", client_type="producer", client_id=~"$client_id", ' - + server_label - + '=~"$server"} > 0)', + + by_client + + "} > 0)", legendFormat="{{client_id}}@{{" + server_label + "}}", ), ], @@ -115,16 +142,14 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.Stat( title="Failed Rebalance Rate per hour", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ", kafka_consumer_consumer_coordinator_metrics_failed_rebalance_rate_per_hour{" - + env_label - + '="$env", client_type="producer", client_id=~"$client_id", ' - + server_label - + '=~"$server"} > 0)', + + by_client + + "} > 0)", legendFormat="{{client_id}}@{{" + server_label + "}}", ), ], @@ -137,14 +162,12 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.Stat( title="Versions", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="count(kafka_consumer_app_info{" - + env_label - + '="$env", client_id=~"$client_id", version!="", ' - + server_label - + '=~"$server"}) by (version)', + + by_client + + ', version!=""}) by (version)', legendFormat="{{version}}", ), ], @@ -156,20 +179,19 @@ def dashboard(env_label="namespace", server_label="pod"): ), ] + ## Performance: performance_base = overview_base + 1 performance_inner = [ G.TimeSeries( title="Bytes Consumed Rate", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ",kafka_consumer_consumer_fetch_manager_metrics_bytes_consumed_rate{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}}", ), ], @@ -182,16 +204,14 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Records Consumed Rate", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ",kafka_consumer_consumer_fetch_manager_metrics_records_consumed_rate{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}}", ), ], @@ -204,16 +224,14 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Records Lag Max", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ",kafka_consumer_consumer_fetch_manager_metrics_records_lag_max{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}}", ), ], @@ -225,16 +243,14 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Fetch Rate", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ",kafka_consumer_consumer_fetch_manager_metrics_fetch_rate{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}}", ), ], @@ -246,26 +262,22 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Fetch Latency", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ",kafka_consumer_consumer_fetch_manager_metrics_fetch_latency_avg{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}} (avg.)", ), G.Target( expr="topk(" + topk + ",kafka_consumer_consumer_fetch_manager_metrics_fetch_latency_max{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}} (max.)", ), ], @@ -277,26 +289,22 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Fetch Size", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ",kafka_consumer_consumer_fetch_manager_metrics_fetch_size_avg{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}} (avg.)", ), G.Target( expr="topk(" + topk + ",kafka_consumer_consumer_fetch_manager_metrics_fetch_size_max{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}} (max.)", ), ], @@ -308,26 +316,22 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Fetch Throttle Time", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ",kafka_consumer_consumer_fetch_manager_metrics_fetch_throttle_time_avg{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}} (avg.)", ), G.Target( expr="topk(" + topk + ",kafka_consumer_consumer_fetch_manager_metrics_fetch_throttle_time_max{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}} (max.)", ), ], @@ -347,20 +351,19 @@ def dashboard(env_label="namespace", server_label="pod"): ), ] + ## Consumer Group: group_base = performance_base + 3 group_inner = [ G.TimeSeries( title="Commit Rate", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ",kafka_consumer_consumer_coordinator_metrics_commit_rate{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}}", ), ], @@ -372,16 +375,14 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Join Rate", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ",kafka_consumer_consumer_coordinator_metrics_join_rate{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}}", ), ], @@ -393,16 +394,14 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Sync Rate", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ",kafka_consumer_consumer_coordinator_metrics_sync_rate{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}}", ), ], @@ -414,26 +413,22 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Commit Latency", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ",kafka_consumer_consumer_coordinator_metrics_commit_latency_avg{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}} (avg.)", ), G.Target( expr="topk(" + topk + ",kafka_consumer_consumer_coordinator_metrics_commit_latency_max{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}} (max.)", ), ], @@ -446,26 +441,22 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Join Time", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ",kafka_consumer_consumer_coordinator_metrics_join_time_avg{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}} (avg.)", ), G.Target( expr="topk(" + topk + ",kafka_consumer_consumer_coordinator_metrics_join_time_max{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}} (max.)", ), ], @@ -478,26 +469,22 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Sync Time", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ",kafka_consumer_consumer_coordinator_metrics_sync_time_avg{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}} (avg.)", ), G.Target( expr="topk(" + topk + ",kafka_consumer_consumer_coordinator_metrics_sync_time_max{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}} (max.)", ), ], @@ -510,16 +497,14 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Heartbeat Rate", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ",kafka_consumer_consumer_coordinator_metrics_heartbeat_rate{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}}", ), ], @@ -531,16 +516,14 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Heartbeat Response Time (Max.)", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ",kafka_consumer_consumer_coordinator_metrics_heartbeat_response_time_max{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}}", ), ], @@ -553,16 +536,14 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Last Heartbeat Seconds Ago", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ",kafka_consumer_consumer_coordinator_metrics_last_heartbeat_seconds_ago{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}}", ), ], @@ -575,26 +556,22 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Rebalance Rate Per Hour", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ",kafka_consumer_consumer_coordinator_metrics_rebalance_rate_per_hour{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}}", ), G.Target( expr="topk(" + topk + ",kafka_consumer_consumer_coordinator_metrics_failed_rebalance_rate_per_hour{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}} (failed)", ), ], @@ -606,26 +583,22 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Rebalance Latency", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ",kafka_consumer_consumer_coordinator_metrics_rebalance_latency_avg{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}} (avg.)", ), G.Target( expr="topk(" + topk + ",kafka_consumer_consumer_coordinator_metrics_rebalance_latency_max{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}} (max.)", ), ], @@ -637,16 +610,14 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Assigned Partitions", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ",kafka_consumer_consumer_coordinator_metrics_assigned_partitions{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}}", ), ], @@ -666,20 +637,19 @@ def dashboard(env_label="namespace", server_label="pod"): ), ] + ## Connections: connection_base = group_base + 4 connection_inner = [ G.TimeSeries( title="Connection Count", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ",kafka_consumer_consumer_metrics_connection_count{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}}", ), ], @@ -691,16 +661,14 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Connection Creation Rate", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ",kafka_consumer_consumer_metrics_connection_creation_rate{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}}", ), ], @@ -713,16 +681,14 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Connection Close Rate", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ",kafka_consumer_consumer_metrics_connection_close_rate{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}}", ), ], @@ -735,60 +701,52 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="IO ratio", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ",kafka_consumer_consumer_metrics_io_ratio{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}}", ), ], legendDisplayMode="table", legendCalcs=["max", "mean", "last"], - # unit="percentunit", gridPos=G.GridPos( h=default_height * 2, w=ts_width, x=ts_width * 0, y=connection_base + 1 ), ), G.TimeSeries( title="IO wait ratio", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ",kafka_consumer_consumer_metrics_io_wait_ratio{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}}", ), ], legendDisplayMode="table", legendCalcs=["max", "mean", "last"], - # unit="percentunit", gridPos=G.GridPos( h=default_height * 2, w=ts_width, x=ts_width * 1, y=connection_base + 1 ), ), G.TimeSeries( title="Select Rate", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ",kafka_consumer_consumer_metrics_select_rate{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}}", ), ], @@ -801,16 +759,14 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="IO time avg.", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ",kafka_consumer_consumer_metrics_io_time_ns_avg{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}}", ), ], @@ -823,16 +779,14 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="IO wait time avg.", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ",kafka_consumer_consumer_metrics_io_wait_time_ns_avg{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}}", ), ], @@ -853,20 +807,19 @@ def dashboard(env_label="namespace", server_label="pod"): ), ] + ## Per Broker: per_broker_base = connection_base + 3 per_broker_inner = [ G.TimeSeries( title="Incoming Byte Rate", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ",kafka_consumer_consumer_node_metrics_incoming_byte_rate{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}} <- {{node_id}}", @@ -881,16 +834,14 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Outgoing Byte Rate", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ",kafka_consumer_consumer_node_metrics_outgoing_byte_rate{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}} -> {{node_id}}", @@ -905,16 +856,14 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Request Latency", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ",kafka_consumer_consumer_node_metrics_request_latency_avg{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}} -> {{node_id}} (avg.)", @@ -923,10 +872,8 @@ def dashboard(env_label="namespace", server_label="pod"): expr="topk(" + topk + ",kafka_consumer_consumer_node_metrics_request_latency_max{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}} -> {{node_id}} (max.)", @@ -941,16 +888,14 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Request Rate", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ",kafka_consumer_consumer_node_metrics_request_rate{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}} -> {{node_id}}", @@ -965,16 +910,14 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Response Rate", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ",kafka_consumer_consumer_node_metrics_response_rate{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}} <- {{node_id}}", @@ -997,20 +940,19 @@ def dashboard(env_label="namespace", server_label="pod"): ), ] + ## Per Topic: per_topic_base = per_broker_base + 2 per_topic_inner = [ G.TimeSeries( title="Bytes Consumed Rate per Topic", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ",kafka_consumer_consumer_fetch_manager_metrics_bytes_consumed_rate{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}} <- {{topic}}", ), ], @@ -1023,16 +965,14 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Records Consumed Rate per Topic", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ",kafka_consumer_consumer_fetch_manager_metrics_records_consumed_rate{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}} <- {{topic}}", ), ], @@ -1045,16 +985,14 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Fetch Size per Topic", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ",kafka_consumer_consumer_fetch_manager_metrics_fetch_size_avg{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}} <- {{topic}} (avg.)", @@ -1063,10 +1001,8 @@ def dashboard(env_label="namespace", server_label="pod"): expr="topk(" + topk + ",kafka_consumer_consumer_fetch_manager_metrics_fetch_size_max{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}} <- {{topic}} (max.)", @@ -1081,16 +1017,14 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Records per Request Avg. per Topic", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ",kafka_consumer_consumer_fetch_manager_metrics_records_per_request_avg{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}} <- {{topic}}", ), ], @@ -1110,6 +1044,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), ] + # group all panels panels = ( overview_panels + performance_panels @@ -1119,8 +1054,9 @@ def dashboard(env_label="namespace", server_label="pod"): + per_topic_panels ) + # build dashboard return G.Dashboard( - title="Kafka Consumer - v2", + title="Kafka Consumer", description="Overview of the Kafka consumers", tags=["confluent", "kafka-client", "kafka-consumer"], inputs=[ @@ -1138,6 +1074,10 @@ def dashboard(env_label="namespace", server_label="pod"): ).auto_panel_ids() +# main labels to customize dashboard +ds = os.environ.get("DATASOURCE", "Prometheus") env_label = os.environ.get("ENV_LABEL", "env") server_label = os.environ.get("SERVER_LABEL", "hostname") -dashboard = dashboard(env_label, server_label) + +# dashboard required by grafanalib +dashboard = dashboard(ds, env_label, server_label) diff --git a/grafana-dashboards/kafka-producer.py b/grafana-dashboards/kafka-producer.py index 261be8ec..f8864041 100644 --- a/grafana-dashboards/kafka-producer.py +++ b/grafana-dashboards/kafka-producer.py @@ -2,26 +2,57 @@ import grafanalib.core as G -def dashboard(env_label="namespace", server_label="pod"): +def dashboard(ds="Prometheus", env_label="namespace", server_label="pod"): + """ + Kafka Producer dashboard + It includes: + - Clients overview + - Performance + - Connections + - Per Broker + - Per Topic + + Structure: + - Default sizes + - Queries + - Templating (variables) + - Panel groups + - Dashboard definition + + Dashboard is defined by a name, it includes the variables to template panels, and then adds the panels. + Panels are grouped in Row to load only needed panels and load others on demand. + + Invariants: + - Max width: 24 + """ + # Default sizes default_height = 5 stat_width = 4 ts_width = 8 + topk = "10" + # Queries + by_env = env_label + '="$env"' + by_producer = by_env + ', client_type="producer"' + by_server = by_producer + "," + server_label + '=~"$server"' + by_client = by_server + ', client_id=~"$client_id"' + + # Templating (variables) templating = G.Templating( list=[ G.Template( name="env", label="Environment", - dataSource="Prometheus", + dataSource=ds, query="label_values(" + env_label + ")", ), G.Template( name="server", label="Server", - dataSource="Prometheus", + dataSource=ds, query="label_values(kafka_producer_producer_metrics_record_retry_rate{" - + env_label - + '="$env", client_type="producer"},' + + by_producer + + "}," + server_label + ")", multi=True, @@ -30,18 +61,18 @@ def dashboard(env_label="namespace", server_label="pod"): G.Template( name="client_id", label="Client ID", - dataSource="Prometheus", + dataSource=ds, query="label_values(kafka_producer_producer_metrics_record_retry_rate{" - + env_label - + '="$env", client_type="producer"},client_id)', + + by_producer + + "},client_id)", multi=True, includeAll=True, ), ] ) - topk = "10" - + # Panel groups + ## Clients overview: overview_panels = [ G.RowPanel( title="Overview", @@ -49,16 +80,14 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.Stat( title="Record Send Rate", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ", kafka_producer_producer_metrics_record_send_rate{" - + env_label - + '="$env", client_type="producer", client_id=~"$client_id", ' - + server_label - + '=~"$server"} > 0)', + + by_client + + "} > 0)", legendFormat="{{client_id}}@{{" + server_label + "}}", ), ], @@ -70,16 +99,14 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.Stat( title="Error Rate", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ", kafka_producer_producer_metrics_record_error_rate{" - + env_label - + '="$env", client_type="producer", client_id=~"$client_id", ' - + server_label - + '=~"$server"} > 0)', + + by_client + + "} > 0)", legendFormat="{{client_id}}@{{" + server_label + "}}", ), ], @@ -92,16 +119,14 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.Stat( title="Retry Rate", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ", kafka_producer_producer_metrics_record_retry_rate{" - + env_label - + '="$env", client_type="producer", client_id=~"$client_id", ' - + server_label - + '=~"$server"} > 0)', + + by_client + + "} > 0)", legendFormat="{{client_id}}@{{" + server_label + "}}", ), ], @@ -115,14 +140,12 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.Stat( title="Versions", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="count(kafka_producer_app_info{" - + env_label - + '="$env", client_id=~"$client_id", version!="", ' - + server_label - + '=~"$server"}) by (version)', + + by_client + + ',version!=""}) by (version)', legendFormat="{{version}}", ), ], @@ -134,20 +157,19 @@ def dashboard(env_label="namespace", server_label="pod"): ), ] + ## Performance: performance_base = 1 performance_inner = [ G.TimeSeries( title="Incoming Byte Rate", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ",kafka_producer_producer_metrics_incoming_byte_rate{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}}", ), ], @@ -160,16 +182,14 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Outgoing Byte Rate", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ",kafka_producer_producer_metrics_outgoing_byte_rate{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}}", ), ], @@ -182,16 +202,14 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Metadata Age", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ",kafka_producer_producer_metrics_metadata_age{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}}", ), ], @@ -204,16 +222,14 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Request Rate", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ",kafka_producer_producer_metrics_request_rate{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}}", ), ], @@ -226,16 +242,14 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Request in-flight", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ",kafka_producer_producer_metrics_requests_in_flight{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}}", ), ], @@ -248,16 +262,14 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Records per Request (avg.)", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ",kafka_producer_producer_metrics_records_per_request_avg{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}}", ), ], @@ -269,16 +281,14 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Record Send Rate", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ",kafka_producer_producer_metrics_record_send_rate{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}}", ), ], @@ -291,16 +301,14 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Record Retry Rate", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ",kafka_producer_producer_metrics_record_retry_rate{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}}", ), ], @@ -313,16 +321,14 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Record Error Rate", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ",kafka_producer_producer_metrics_record_error_rate{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}}", ), ], @@ -335,26 +341,22 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Record Size", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ",kafka_producer_producer_metrics_record_size_avg{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}} (avg.)", ), G.Target( expr="topk(" + topk + ",kafka_producer_producer_metrics_record_size_max{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}} (max.)", ), ], @@ -367,26 +369,22 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Record Queue Time", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ",kafka_producer_producer_metrics_record_queue_time_avg{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}} (avg.)", ), G.Target( expr="topk(" + topk + ",kafka_producer_producer_metrics_record_queue_time_max{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}} (max.)", ), ], @@ -399,26 +397,22 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Produce Throttle Time", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ",kafka_producer_producer_metrics_produce_throttle_time_avg{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}} (avg.)", ), G.Target( expr="topk(" + topk + ",kafka_producer_producer_metrics_produce_throttle_time_max{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}} (max.)", ), ], @@ -431,26 +425,22 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Batch Size", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ",kafka_producer_producer_metrics_batch_size_avg{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}} (avg.)", ), G.Target( expr="topk(" + topk + ",kafka_producer_producer_metrics_batch_size_max{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}} (max.)", ), ], @@ -463,16 +453,14 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Batch Split Rate", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ",kafka_producer_producer_metrics_batch_split_rate{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}}", ), ], @@ -485,16 +473,14 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Compression Rate (avg.)", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ",kafka_producer_producer_metrics_compression_rate_avg{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}}", ), ], @@ -515,20 +501,19 @@ def dashboard(env_label="namespace", server_label="pod"): ), ] + ## Connections: connection_base = performance_base + 5 connection_inner = [ G.TimeSeries( title="Connection Count", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ",kafka_producer_producer_metrics_connection_count{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}}", ), ], @@ -540,16 +525,14 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Connection Creation Rate", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ",kafka_producer_producer_metrics_connection_creation_rate{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}}", ), ], @@ -562,16 +545,14 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Connection Close Rate", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ",kafka_producer_producer_metrics_connection_close_rate{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}}", ), ], @@ -584,16 +565,14 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="IO ratio", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ",kafka_producer_producer_metrics_io_ratio{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}}", ), ], @@ -606,16 +585,14 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="IO wait ratio", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ",kafka_producer_producer_metrics_io_wait_ratio{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}}", ), ], @@ -628,16 +605,14 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Select Rate", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ",kafka_producer_producer_metrics_select_rate{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}}", ), ], @@ -650,16 +625,14 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="IO time avg.", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ",kafka_producer_producer_metrics_io_time_ns_avg{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}}", ), ], @@ -672,16 +645,14 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="IO wait time avg.", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ",kafka_producer_producer_metrics_io_wait_time_ns_avg{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}}", ), ], @@ -702,20 +673,19 @@ def dashboard(env_label="namespace", server_label="pod"): ), ] + ## Per Broker: per_broker_base = connection_base + 2 per_broker_inner = [ G.TimeSeries( title="Incoming Byte Rate", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ",kafka_producer_producer_node_metrics_incoming_byte_rate{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}} <- {{node_id}}", @@ -730,16 +700,14 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Outgoing Byte Rate", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ",kafka_producer_producer_node_metrics_outgoing_byte_rate{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}} -> {{node_id}}", @@ -754,16 +722,14 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Request Latency", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ",kafka_producer_producer_node_metrics_request_latency_avg{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}} -> {{node_id}} (avg.)", @@ -772,10 +738,8 @@ def dashboard(env_label="namespace", server_label="pod"): expr="topk(" + topk + ",kafka_producer_producer_node_metrics_request_latency_max{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}} -> {{node_id}} (max.)", @@ -790,16 +754,14 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Request Rate", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ",kafka_producer_producer_node_metrics_request_rate{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}} -> {{node_id}}", @@ -814,16 +776,14 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Response Rate", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ",kafka_producer_producer_node_metrics_response_rate{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}} <- {{node_id}}", @@ -846,20 +806,19 @@ def dashboard(env_label="namespace", server_label="pod"): ), ] + ## Per Topic: per_topic_base = per_broker_base + 2 per_topic_inner = [ G.TimeSeries( title="Byte Rate", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ",kafka_producer_producer_topic_metrics_byte_rate{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}} -> {{topic}}", ), ], @@ -872,16 +831,14 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Compression Rate", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ",kafka_producer_producer_topic_metrics_compression_rate{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}} -> {{topic}}", ), ], @@ -894,16 +851,14 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Record Send Rate", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ",kafka_producer_producer_topic_metrics_record_send_rate{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}} -> {{topic}}", ), ], @@ -916,16 +871,14 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Record Retry Rate", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ",kafka_producer_producer_topic_metrics_record_retry_rate{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}} -> {{topic}}", ), ], @@ -938,16 +891,14 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Record Error Rate", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ",kafka_producer_producer_topic_metrics_record_error_rate{" - + env_label - + '="$env",client_id=~"$client_id", ' - + server_label - + '=~"$server"})', + + by_client + + "})", legendFormat="{{client_id}}@{{" + server_label + "}} -> {{topic}}", ), ], @@ -968,6 +919,7 @@ def dashboard(env_label="namespace", server_label="pod"): ), ] + # group all panels panels = ( overview_panels + performance_panels @@ -976,8 +928,9 @@ def dashboard(env_label="namespace", server_label="pod"): + per_topic_panels ) + # build dashboard return G.Dashboard( - title="Kafka Producer - v2", + title="Kafka Producer", description="Overview of the Kafka producers", tags=["confluent", "kafka-client", "kafka-producer"], inputs=[ @@ -995,6 +948,10 @@ def dashboard(env_label="namespace", server_label="pod"): ).auto_panel_ids() +# main labels to customize dashboard +ds = os.environ.get("DATASOURCE", "Prometheus") env_label = os.environ.get("ENV_LABEL", "env") server_label = os.environ.get("SERVER_LABEL", "hostname") -dashboard = dashboard(env_label, server_label) + +# dashboard required by grafanalib +dashboard = dashboard(ds, env_label, server_label) diff --git a/grafana-dashboards/kafka-quotas.py b/grafana-dashboards/kafka-quotas.py index 654ecef0..72b870e6 100644 --- a/grafana-dashboards/kafka-quotas.py +++ b/grafana-dashboards/kafka-quotas.py @@ -2,25 +2,57 @@ import grafanalib.core as G -def dashboard(env_label="namespace", server_label="pod"): +def dashboard(ds="Prometheus", env_label="namespace", server_label="pod"): + """ + Kafka Quotas dashboard + It includes: + - Quotas overview + - Throttling + + Structure: + - Default sizes + - Queries + - Templating (variables) + - Panel groups + - Dashboard definition + + Dashboard is defined by a name, it includes the variables to template panels, and then adds the panels. + Panels are grouped in Row to load only needed panels and load others on demand. + + Invariants: + - Max width: 24 + """ + + # Default sizes default_height = 6 ts_width = 8 + topk = "10" + + # Queries + by_env = env_label + '="$env"' + by_client = ( + by_env + + ',user=~"$user",client_id=~"$client_id",' + + server_label + + '=~"$broker"' + ) + # Templating (variables) templating = G.Templating( list=[ G.Template( name="env", label="Environment", - dataSource="Prometheus", + dataSource=ds, query="label_values(" + env_label + ")", ), G.Template( name="broker", label="Broker", - dataSource="Prometheus", + dataSource=ds, query="label_values(kafka_server_produce_byte_rate{" - + env_label - + '="$env"},' + + by_env + + "}," + server_label + ")", multi=True, @@ -29,7 +61,7 @@ def dashboard(env_label="namespace", server_label="pod"): G.Template( name="user", label="User", - dataSource="Prometheus", + dataSource=ds, query="label_values(user)", multi=True, includeAll=True, @@ -37,7 +69,7 @@ def dashboard(env_label="namespace", server_label="pod"): G.Template( name="client_id", label="Client ID", - dataSource="Prometheus", + dataSource=ds, query="label_values(client_id)", multi=True, includeAll=True, @@ -45,21 +77,18 @@ def dashboard(env_label="namespace", server_label="pod"): ] ) - topk = "10" - + # Panels: panels = [ G.TimeSeries( title="Produce Byte Rate", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ",kafka_server_produce_byte_rate{" - + env_label - + '="$env",user=~"$user",client_id=~"$client_id", ' - + server_label - + '=~"$broker"})', + + by_client + + "})", legendFormat="User:{{user}} | Client ID:{{client_id}} @ Broker:{{" + server_label + "}}", @@ -72,16 +101,14 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Fetch Byte Rate", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ",kafka_server_fetch_byte_rate{" - + env_label - + '="$env",user=~"$user",client_id=~"$client_id", ' - + server_label - + '=~"$broker"})', + + by_client + + "})", legendFormat="User:{{user}} | Client ID:{{client_id}} @ Broker:{{" + server_label + "}}", @@ -94,16 +121,14 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Request Time", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ",kafka_server_request_request_time{" - + env_label - + '="$env",user=~"$user",client_id=~"$client_id", ' - + server_label - + '=~"$broker"})', + + by_client + + "})", legendFormat="User:{{user}} | Client ID:{{client_id}} @ Broker:{{" + server_label + "}}", @@ -116,16 +141,14 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Produce Throttle Time", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ",kafka_server_produce_throttle_time{" - + env_label - + '="$env",user=~"$user",client_id=~"$client_id", ' - + server_label - + '=~"$broker"} > 0)', + + by_client + + "} > 0)", legendFormat="User:{{user}} | Client ID:{{client_id}} @ Broker:{{" + server_label + "}}", @@ -138,16 +161,14 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Fetch Throttle Time", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ",kafka_server_fetch_throttle_time{" - + env_label - + '="$env",user=~"$user",client_id=~"$client_id", ' - + server_label - + '=~"$broker"} > 0)', + + by_client + + "} > 0)", legendFormat="User:{{user}} | Client ID:{{client_id}} @ Broker:{{" + server_label + "}}", @@ -160,16 +181,14 @@ def dashboard(env_label="namespace", server_label="pod"): ), G.TimeSeries( title="Request Throttle Time", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ",kafka_server_request_throttle_time{" - + env_label - + '="$env",user=~"$user",client_id=~"$client_id", ' - + server_label - + '=~"$broker"} > 0)', + + by_client + + "} > 0)", legendFormat="User:{{user}} | Client ID:{{client_id}} @ Broker:{{" + server_label + "}}", @@ -183,7 +202,7 @@ def dashboard(env_label="namespace", server_label="pod"): ] return G.Dashboard( - title="Kafka Quotas - v2", + title="Kafka Quotas", description="Overview of the Kafka quotass", tags=["confluent", "kafka-client", "kafka-quota"], inputs=[ @@ -201,6 +220,10 @@ def dashboard(env_label="namespace", server_label="pod"): ).auto_panel_ids() +# main labels to customize dashboard +ds = os.environ.get("DATASOURCE", "Prometheus") env_label = os.environ.get("ENV_LABEL", "env") server_label = os.environ.get("SERVER_LABEL", "hostname") -dashboard = dashboard(env_label, server_label) + +# dashboard required by grafanalib +dashboard = dashboard(ds, env_label, server_label) diff --git a/grafana-dashboards/kafka-topics.py b/grafana-dashboards/kafka-topics.py index 021998a0..f3e0a907 100644 --- a/grafana-dashboards/kafka-topics.py +++ b/grafana-dashboards/kafka-topics.py @@ -2,34 +2,59 @@ import grafanalib.core as G -def dashboard(env_label="namespace"): +def dashboard(ds="Prometheus", env_label="namespace", server_label="pod"): + """ + Kafka Topics dashboard + It includes: + - Throughput + - Offsets + + Structure: + - Default sizes + - Queries + - Templating (variables) + - Panel groups + - Dashboard definition + + Dashboard is defined by a name, it includes the variables to template panels, and then adds the panels. + Panels are grouped in Row to load only needed panels and load others on demand. + + Invariants: + - Max width: 24 + """ + + # Default sizes default_height = 10 ts_width = 12 table_width = 12 + topk = "10" + + # Queries + by_env = env_label + '="$env"' + by_topic = by_env + ',topic=~"$topic"' + # Templating (variables) templating = G.Templating( list=[ G.Template( name="env", label="Environment", - dataSource="Prometheus", + dataSource=ds, query="label_values(" + env_label + ")", ), G.Template( name="topic", label="Topic", - dataSource="Prometheus", - query="label_values(kafka_log_log_size{" - + env_label - + '="$env"}, topic)', + dataSource=ds, + query="label_values(kafka_log_log_size{" + by_env + "}, topic)", multi=True, includeAll=True, ), ] ) - topk = "10" - + # Panel groups: + ## Throughtput: throughput_base = 0 throughput_layers = 3 throughput_panels = [ @@ -39,14 +64,15 @@ def dashboard(env_label="namespace"): ), G.TimeSeries( title="Messages In/Sec", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk - + ', sum without(instance,pod,statefulset_kubernetes_io_pod_name) (rate(kafka_server_brokertopicmetrics_messagesinpersec{topic=~"$topic",' - + env_label - + '="$env"}[5m])))', + + ", sum without(instance,pod,statefulset_kubernetes_io_pod_name) " + + "(rate(kafka_server_brokertopicmetrics_messagesinpersec{" + + by_topic + + "}[5m])))", legendFormat="{{topic}}", ), ], @@ -60,14 +86,14 @@ def dashboard(env_label="namespace"): ), G.TimeSeries( title="Log size", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ", sum(kafka_log_log_size{" - + env_label - + '="$env",topic=~"$topic"}) by (topic))', + + by_topic + + "}) by (topic))", legendFormat="{{topic}}", ), ], @@ -81,14 +107,14 @@ def dashboard(env_label="namespace"): ), G.TimeSeries( title="Bytes In/Sec", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk - + ', sum without(instance,pod,statefulset_kubernetes_io_pod_name) (rate(kafka_server_brokertopicmetrics_bytesinpersec{topic=~"$topic",' - + env_label - + '="$env"}[5m])))', + + ", sum without(instance,pod,statefulset_kubernetes_io_pod_name) (rate(kafka_server_brokertopicmetrics_bytesinpersec{" + + by_topic + + "}[5m])))", legendFormat="{{topic}}", ), ], @@ -102,14 +128,15 @@ def dashboard(env_label="namespace"): ), G.TimeSeries( title="Bytes Out/Sec", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk - + ', sum without(instance,pod,statefulset_kubernetes_io_pod_name) (rate(kafka_server_brokertopicmetrics_bytesoutpersec{topic=~"$topic",' - + env_label - + '="$env"}[5m])))', + + ", sum without(instance,pod,statefulset_kubernetes_io_pod_name) " + + "(rate(kafka_server_brokertopicmetrics_bytesoutpersec{" + + by_topic + + "}[5m])))", legendFormat="{{topic}}", ), ], @@ -123,14 +150,14 @@ def dashboard(env_label="namespace"): ), G.TimeSeries( title="Produce Requests/Sec", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ", sum(rate(kafka_server_brokertopicmetrics_totalproducerequestspersec{ " - + env_label - + '="$env", topic=~"$topic"}[5m])) by (topic))', + + by_topic + + "}[5m])) by (topic))", legendFormat="{{topic}}", ), ], @@ -144,14 +171,14 @@ def dashboard(env_label="namespace"): ), G.TimeSeries( title="Consumer Fetch Requests/Sec", - dataSource="Prometheus", + dataSource=ds, targets=[ G.Target( expr="topk(" + topk + ", sum(rate(kafka_server_brokertopicmetrics_totalfetchrequestspersec{ " - + env_label - + '="$env", topic=~"$topic"}[5m])) by (topic))', + + by_topic + + "}[5m])) by (topic))", legendFormat="{{topic}}", ), ], @@ -167,77 +194,87 @@ def dashboard(env_label="namespace"): offsets_txs = [ { - "id": "organize", - "options": { - "excludeByName": { - "Time": True, - "__name__": True, - "app": True, - "confluent_platform": True, - "controller_revision_hash": True, - "job": True, - "clusterId": True, - "confluentPlatform": True, - "instance": True, - "namespace": True, - "platform_confluent_io_type": True, - "statefulset_kubernetes_io_pod_name": True, - "type": True, - }, - "indexByName": { - "pod": 1, - "topic": 2, - "partition": 3, - "Value": 4, - }, - "renameByName": {"Value": "offset"}, - }, + "id": "concatenate", + "options": { + "frameNameLabel": "id", + "frameNameMode": "label" + } }, { - "id": "convertFieldType", - "options": { - "conversions": [ - {"destinationType": "number", "targetField": "partition"} - ], - "fields": {}, - }, + "id": "filterFieldsByName", + "options": { + "include": { + "names": [ + server_label+" 1", + "topic 1", + "Value #A", + "Value #B", + "partition 1" + ] + } + } }, - {"id": "sortBy", "options": {"fields": {}, "sort": [{"field": "topic"}]}}, { - "id": "sortBy", - "options": {"fields": {}, "sort": [{"field": "partition"}]}, + "id": "sortBy", + "options": { + "fields": {}, + "sort": [ + { + "field": "partition 1" + } + ] + } }, + { + "id": "sortBy", + "options": { + "fields": {}, + "sort": [ + { + "field": "topic 1" + } + ] + } + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "Value #A": 3, + "Value #B": 4, + "hostname 1": 0, + "id": 5, + "partition 1": 2, + "topic 1": 1 + }, + "renameByName": { + "Value #A": "start offset", + "Value #B": "end offset", + "hostname 1": "broker", + "partition 1": "", + "topic": "", + "topic 1": "" + } + } + } ] + ## Offsets offsets_base = throughput_base + throughput_layers offsets_inner = [ G.Table( - title="Start Offsets", - dataSource="Prometheus", + title="Offsets", + dataSource=ds, targets=[ G.Target( - expr="kafka_log_log_logstartoffset{" - + env_label - + '="$env",topic=~"$topic"}', + expr="kafka_log_log_logstartoffset{" + by_topic + "}", legendFormat="{{topic}}", format="table", instant=True, ), - ], - filterable=True, - transformations=offsets_txs, - gridPos=G.GridPos( - h=default_height, w=table_width, x=table_width * 0, y=offsets_base - ), - ), - G.Table( - title="End Offsets", - dataSource="Prometheus", - targets=[ G.Target( - expr="kafka_log_log_logendoffset{" - + env_label - + '="$env",topic=~"$topic"}', + expr="kafka_log_log_logendoffset{" + by_topic + "}", legendFormat="{{topic}}", format="table", instant=True, @@ -246,7 +283,7 @@ def dashboard(env_label="namespace"): filterable=True, transformations=offsets_txs, gridPos=G.GridPos( - h=default_height, w=table_width, x=table_width * 1, y=offsets_base + h=default_height, w=table_width * 2, x=table_width * 0, y=offsets_base ), ), ] @@ -259,9 +296,12 @@ def dashboard(env_label="namespace"): ), ] + # group all panels panels = throughput_panels + offsets_panels + + # build dashboard return G.Dashboard( - title="Kafka topics - v2", + title="Kafka topics", description="Overview of the Kafka topics", tags=["confluent", "kafka"], inputs=[ @@ -279,5 +319,10 @@ def dashboard(env_label="namespace"): ).auto_panel_ids() +# main labels to customize dashboard +ds = os.environ.get("DATASOURCE", "Prometheus") env_label = os.environ.get("ENV_LABEL", "env") -dashboard = dashboard(env_label) +server_label = os.environ.get("SERVER_LABEL", "hostname") + +# dashboard required by grafanalib +dashboard = dashboard(ds, env_label, server_label) diff --git a/grafana-dashboards/ksqldb-cluster.py b/grafana-dashboards/ksqldb-cluster.py index 3caeab89..d2e5adb8 100644 --- a/grafana-dashboards/ksqldb-cluster.py +++ b/grafana-dashboards/ksqldb-cluster.py @@ -93,7 +93,6 @@ def dashboard( title="Overview", gridPos=G.GridPos(h=1, w=24, x=0, y=overview_base), ), - # First layer G.Stat( title="ksqlDB: Online Servers", @@ -203,7 +202,6 @@ def dashboard( h=default_height, w=stat_width, x=stat_width * 4, y=overview_base ), ), - # Second layer G.TimeSeries( title="Cluster Liveness", @@ -219,7 +217,9 @@ def dashboard( ], legendDisplayMode="table", legendCalcs=["max", "mean", "last"], - gridPos=G.GridPos(h=default_height * 2, w=ts_width, x=0, y=overview_base + 1), + gridPos=G.GridPos( + h=default_height * 2, w=ts_width, x=0, y=overview_base + 1 + ), ), G.TimeSeries( title="Messages consumed/sec", @@ -330,147 +330,99 @@ def dashboard( queries_base = system_base + 1 queries_inner = [ G.TimeSeries( - title="Poll Latency (Avg.)", + title="Poll Latency", dataSource=ds, targets=[ G.Target( expr="kafka_streams_stream_thread_metrics_poll_latency_avg{" + by_thread + "}", - legendFormat="{{thread_id}}", + legendFormat="{{thread_id}} (avg.)", ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="ms", - gridPos=G.GridPos( - h=default_height * 2, w=ts_width, x=ts_width * 0, y=queries_base - ), - ), - G.TimeSeries( - title="Poll Latency (Max.)", - dataSource=ds, - targets=[ G.Target( expr="kafka_streams_stream_thread_metrics_poll_latency_max{" + by_thread + "}", - legendFormat="{{thread_id}}", + legendFormat="{{thread_id}} (max.)", ), ], legendDisplayMode="table", legendCalcs=["max", "mean", "last"], unit="ms", gridPos=G.GridPos( - h=default_height * 2, w=ts_width, x=ts_width * 1, y=queries_base + h=default_height * 2, w=ts_width, x=ts_width * 0, y=queries_base ), ), G.TimeSeries( - title="Process Latency (Avg.)", + title="Process Latency", dataSource=ds, targets=[ G.Target( expr="kafka_streams_stream_thread_metrics_process_latency_avg{" + by_thread + "}", - legendFormat="{{thread_id}}", + legendFormat="{{thread_id}} (avg.)", ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="ms", - gridPos=G.GridPos( - h=default_height * 2, w=ts_width, x=ts_width * 0, y=queries_base + 1 - ), - ), - G.TimeSeries( - title="Process Latency (Max.)", - dataSource=ds, - targets=[ G.Target( expr="kafka_streams_stream_thread_metrics_process_latency_max{" + by_thread + "}", - legendFormat="{{thread_id}}", + legendFormat="{{thread_id}} (max.)", ), ], legendDisplayMode="table", legendCalcs=["max", "mean", "last"], unit="ms", gridPos=G.GridPos( - h=default_height * 2, w=ts_width, x=ts_width * 1, y=queries_base + 1 + h=default_height * 2, w=ts_width, x=ts_width * 1, y=queries_base ), ), G.TimeSeries( - title="Commit Latency (Avg.)", + title="Commit Latency", dataSource=ds, targets=[ G.Target( expr="kafka_streams_stream_thread_metrics_commit_latency_avg{" + by_thread + "}", - legendFormat="{{thread_id}}", + legendFormat="{{thread_id}} (avg.)", ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="ms", - gridPos=G.GridPos( - h=default_height * 2, w=ts_width, x=ts_width * 0, y=queries_base + 2 - ), - ), - G.TimeSeries( - title="Commit Latency (Max.)", - dataSource=ds, - targets=[ G.Target( expr="kafka_streams_stream_thread_metrics_commit_latency_max{" + by_thread + "}", - legendFormat="{{thread_id}}", + legendFormat="{{thread_id}} (max.)", ), ], legendDisplayMode="table", legendCalcs=["max", "mean", "last"], unit="ms", gridPos=G.GridPos( - h=default_height * 2, w=ts_width, x=ts_width * 1, y=queries_base + 2 + h=default_height * 2, w=ts_width, x=ts_width * 0, y=queries_base + 1 ), ), G.TimeSeries( - title="Punctuate Latency (Avg.)", + title="Punctuate Latency", dataSource=ds, targets=[ G.Target( expr="kafka_streams_stream_thread_metrics_punctuate_latency_avg{" + by_thread + "}", - legendFormat="{{thread_id}}", + legendFormat="{{thread_id}} (avg.)", ), - ], - legendDisplayMode="table", - legendCalcs=["max", "mean", "last"], - unit="ms", - gridPos=G.GridPos( - h=default_height * 2, w=ts_width, x=ts_width * 0, y=queries_base + 3 - ), - ), - G.TimeSeries( - title="Punctuate Latency (Max.)", - dataSource=ds, - targets=[ G.Target( expr="kafka_streams_stream_thread_metrics_punctuate_latency_max{" + by_thread + "}", - legendFormat="{{thread_id}}", + legendFormat="{{thread_id}} (max.)", ), ], legendDisplayMode="table", legendCalcs=["max", "mean", "last"], unit="ms", gridPos=G.GridPos( - h=default_height * 2, w=ts_width, x=ts_width * 1, y=queries_base + 3 + h=default_height * 2, w=ts_width, x=ts_width * 1, y=queries_base + 1 ), ), ] @@ -484,7 +436,7 @@ def dashboard( ] ## State stores: - stores_base = queries_base + 4 + stores_base = queries_base + 2 stores_inner = [ G.TimeSeries( title="Put Rate", @@ -771,7 +723,7 @@ def dashboard( # build dashboard return G.Dashboard( - title="ksqlDB cluster - v2", + title="ksqlDB cluster", description="Overview of ksqlDB clusters.", tags=[ "confluent", diff --git a/grafana-dashboards/schema-registry-cluster.py b/grafana-dashboards/schema-registry-cluster.py index 023f0f37..3b4db59a 100644 --- a/grafana-dashboards/schema-registry-cluster.py +++ b/grafana-dashboards/schema-registry-cluster.py @@ -223,7 +223,7 @@ def dashboard(ds="Prometheus", env_label="namespace", server_label="pod"): # build dashboard return G.Dashboard( - title="Schema Registry cluster - v2", + title="Schema Registry cluster", description="Overview of the Schema Registry cluster", tags=["confluent", "schema-registry"], inputs=[ diff --git a/grafana-dashboards/zookeeper-cluster.py b/grafana-dashboards/zookeeper-cluster.py index c0dd71d3..cea3d67b 100644 --- a/grafana-dashboards/zookeeper-cluster.py +++ b/grafana-dashboards/zookeeper-cluster.py @@ -381,7 +381,7 @@ def dashboard(env_label="namespace", server_label="pod"): panels = healthcheck_panels + system_panels + latency_panels + kafka_panels return G.Dashboard( - title="Zookeeper cluster - v2", + title="Zookeeper cluster", description="Overview of the Zookeeper cluster", tags=["confluent", "kafka", "zookeeper"], inputs=[ diff --git a/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/confluent-platform.json b/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/confluent-platform.json index 4e89ac2d..866e727f 100644 --- a/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/confluent-platform.json +++ b/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/confluent-platform.json @@ -2675,7 +2675,7 @@ ] }, "timezone": "browser", - "title": "Confluent Platform overview - v2", + "title": "Confluent Platform overview", "uid": null, "version": 0 } diff --git a/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-cluster.json b/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-cluster.json index 1c7f6dbe..330da237 100644 --- a/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-cluster.json +++ b/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-cluster.json @@ -5531,7 +5531,7 @@ ] }, "timezone": "browser", - "title": "Kafka cluster - v2", + "title": "Kafka cluster", "uid": null, "version": 0 } diff --git a/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-connect-cluster.json b/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-connect-cluster.json index d403d74c..cb9b034b 100644 --- a/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-connect-cluster.json +++ b/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-connect-cluster.json @@ -4148,7 +4148,7 @@ ] }, "timezone": "browser", - "title": "Kafka Connect cluster - v2", + "title": "Kafka Connect cluster", "uid": null, "version": 0 } diff --git a/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-consumer.json b/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-consumer.json index 3aef1c22..7d19cec2 100644 --- a/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-consumer.json +++ b/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-consumer.json @@ -123,7 +123,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10, kafka_consumer_consumer_fetch_manager_metrics_records_consumed_rate{env=\"$env\", client_type=\"producer\", client_id=~\"$client_id\", hostname=~\"$server\"} > 0)", + "expr": "topk(10, kafka_consumer_consumer_fetch_manager_metrics_records_consumed_rate{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"} > 0)", "format": "time_series", "hide": false, "instant": false, @@ -206,7 +206,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10, kafka_consumer_consumer_fetch_manager_metrics_records_consumed_rate{env=\"$env\", client_type=\"producer\", client_id=~\"$client_id\", hostname=~\"$server\"} > 0)", + "expr": "topk(10, kafka_consumer_consumer_fetch_manager_metrics_records_consumed_rate{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"} > 0)", "format": "time_series", "hide": false, "instant": false, @@ -305,7 +305,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10, kafka_consumer_consumer_coordinator_metrics_rebalance_rate_per_hour{env=\"$env\", client_type=\"producer\", client_id=~\"$client_id\", hostname=~\"$server\"} > 0)", + "expr": "topk(10, kafka_consumer_consumer_coordinator_metrics_rebalance_rate_per_hour{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"} > 0)", "format": "time_series", "hide": false, "instant": false, @@ -396,7 +396,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10, kafka_consumer_consumer_coordinator_metrics_failed_rebalance_rate_per_hour{env=\"$env\", client_type=\"producer\", client_id=~\"$client_id\", hostname=~\"$server\"} > 0)", + "expr": "topk(10, kafka_consumer_consumer_coordinator_metrics_failed_rebalance_rate_per_hour{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"} > 0)", "format": "time_series", "hide": false, "instant": false, @@ -479,7 +479,7 @@ "targets": [ { "datasource": null, - "expr": "count(kafka_consumer_app_info{env=\"$env\", client_id=~\"$client_id\", version!=\"\", hostname=~\"$server\"}) by (version)", + "expr": "count(kafka_consumer_app_info{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\", version!=\"\"}) by (version)", "format": "time_series", "hide": false, "instant": false, @@ -609,7 +609,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_bytes_consumed_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_bytes_consumed_rate{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -709,7 +709,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_records_consumed_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_records_consumed_rate{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -809,7 +809,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_records_lag_max{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_records_lag_max{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -909,7 +909,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_rate{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -1009,7 +1009,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_latency_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_latency_avg{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -1023,7 +1023,7 @@ }, { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_latency_max{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_latency_max{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -1123,7 +1123,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_size_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_size_avg{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -1137,7 +1137,7 @@ }, { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_size_max{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_size_max{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -1237,7 +1237,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_throttle_time_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_throttle_time_avg{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -1251,7 +1251,7 @@ }, { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_throttle_time_max{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_throttle_time_max{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -1393,7 +1393,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_commit_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_commit_rate{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -1493,7 +1493,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_join_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_join_rate{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -1593,7 +1593,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_sync_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_sync_rate{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -1693,7 +1693,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_commit_latency_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_commit_latency_avg{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -1707,7 +1707,7 @@ }, { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_commit_latency_max{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_commit_latency_max{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -1807,7 +1807,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_join_time_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_join_time_avg{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -1821,7 +1821,7 @@ }, { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_join_time_max{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_join_time_max{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -1921,7 +1921,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_sync_time_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_sync_time_avg{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -1935,7 +1935,7 @@ }, { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_sync_time_max{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_sync_time_max{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -2035,7 +2035,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_heartbeat_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_heartbeat_rate{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -2135,7 +2135,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_heartbeat_response_time_max{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_heartbeat_response_time_max{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -2235,7 +2235,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_last_heartbeat_seconds_ago{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_last_heartbeat_seconds_ago{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -2335,7 +2335,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_rebalance_rate_per_hour{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_rebalance_rate_per_hour{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -2349,7 +2349,7 @@ }, { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_failed_rebalance_rate_per_hour{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_failed_rebalance_rate_per_hour{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -2449,7 +2449,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_rebalance_latency_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_rebalance_latency_avg{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -2463,7 +2463,7 @@ }, { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_rebalance_latency_max{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_rebalance_latency_max{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -2563,7 +2563,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_assigned_partitions{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_coordinator_metrics_assigned_partitions{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -2705,7 +2705,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_metrics_connection_count{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_metrics_connection_count{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -2805,7 +2805,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_metrics_connection_creation_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_metrics_connection_creation_rate{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -2905,7 +2905,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_metrics_connection_close_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_metrics_connection_close_rate{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -3005,7 +3005,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_metrics_io_ratio{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_metrics_io_ratio{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -3105,7 +3105,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_metrics_io_wait_ratio{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_metrics_io_wait_ratio{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -3205,7 +3205,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_metrics_select_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_metrics_select_rate{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -3305,7 +3305,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_metrics_io_time_ns_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_metrics_io_time_ns_avg{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -3405,7 +3405,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_metrics_io_wait_time_ns_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_metrics_io_wait_time_ns_avg{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -3547,7 +3547,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_node_metrics_incoming_byte_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_node_metrics_incoming_byte_rate{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -3647,7 +3647,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_node_metrics_outgoing_byte_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_node_metrics_outgoing_byte_rate{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -3747,7 +3747,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_node_metrics_request_latency_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_node_metrics_request_latency_avg{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -3761,7 +3761,7 @@ }, { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_node_metrics_request_latency_max{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_node_metrics_request_latency_max{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -3861,7 +3861,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_node_metrics_request_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_node_metrics_request_rate{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -3961,7 +3961,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_node_metrics_response_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_node_metrics_response_rate{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -4103,7 +4103,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_bytes_consumed_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_bytes_consumed_rate{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -4203,7 +4203,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_records_consumed_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_records_consumed_rate{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -4303,7 +4303,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_size_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_size_avg{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -4317,7 +4317,7 @@ }, { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_size_max{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_fetch_size_max{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -4417,7 +4417,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_records_per_request_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_consumer_consumer_fetch_manager_metrics_records_per_request_avg{env=\"$env\", client_type=\"consumer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -4576,7 +4576,7 @@ ] }, "timezone": "browser", - "title": "Kafka Consumer - v2", + "title": "Kafka Consumer", "uid": null, "version": 0 } diff --git a/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-producer.json b/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-producer.json index 17359359..7b9b5c02 100644 --- a/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-producer.json +++ b/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-producer.json @@ -123,7 +123,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10, kafka_producer_producer_metrics_record_send_rate{env=\"$env\", client_type=\"producer\", client_id=~\"$client_id\", hostname=~\"$server\"} > 0)", + "expr": "topk(10, kafka_producer_producer_metrics_record_send_rate{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"} > 0)", "format": "time_series", "hide": false, "instant": false, @@ -214,7 +214,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10, kafka_producer_producer_metrics_record_error_rate{env=\"$env\", client_type=\"producer\", client_id=~\"$client_id\", hostname=~\"$server\"} > 0)", + "expr": "topk(10, kafka_producer_producer_metrics_record_error_rate{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"} > 0)", "format": "time_series", "hide": false, "instant": false, @@ -313,7 +313,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10, kafka_producer_producer_metrics_record_retry_rate{env=\"$env\", client_type=\"producer\", client_id=~\"$client_id\", hostname=~\"$server\"} > 0)", + "expr": "topk(10, kafka_producer_producer_metrics_record_retry_rate{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"} > 0)", "format": "time_series", "hide": false, "instant": false, @@ -396,7 +396,7 @@ "targets": [ { "datasource": null, - "expr": "count(kafka_producer_app_info{env=\"$env\", client_id=~\"$client_id\", version!=\"\", hostname=~\"$server\"}) by (version)", + "expr": "count(kafka_producer_app_info{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\",version!=\"\"}) by (version)", "format": "time_series", "hide": false, "instant": false, @@ -526,7 +526,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_incoming_byte_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_producer_producer_metrics_incoming_byte_rate{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -626,7 +626,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_outgoing_byte_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_producer_producer_metrics_outgoing_byte_rate{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -726,7 +726,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_metadata_age{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_producer_producer_metrics_metadata_age{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -826,7 +826,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_request_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_producer_producer_metrics_request_rate{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -926,7 +926,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_requests_in_flight{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_producer_producer_metrics_requests_in_flight{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -1026,7 +1026,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_records_per_request_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_producer_producer_metrics_records_per_request_avg{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -1126,7 +1126,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_record_send_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_producer_producer_metrics_record_send_rate{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -1226,7 +1226,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_record_retry_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_producer_producer_metrics_record_retry_rate{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -1326,7 +1326,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_record_error_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_producer_producer_metrics_record_error_rate{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -1426,7 +1426,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_record_size_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_producer_producer_metrics_record_size_avg{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -1440,7 +1440,7 @@ }, { "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_record_size_max{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_producer_producer_metrics_record_size_max{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -1540,7 +1540,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_record_queue_time_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_producer_producer_metrics_record_queue_time_avg{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -1554,7 +1554,7 @@ }, { "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_record_queue_time_max{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_producer_producer_metrics_record_queue_time_max{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -1654,7 +1654,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_produce_throttle_time_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_producer_producer_metrics_produce_throttle_time_avg{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -1668,7 +1668,7 @@ }, { "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_produce_throttle_time_max{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_producer_producer_metrics_produce_throttle_time_max{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -1768,7 +1768,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_batch_size_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_producer_producer_metrics_batch_size_avg{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -1782,7 +1782,7 @@ }, { "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_batch_size_max{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_producer_producer_metrics_batch_size_max{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -1882,7 +1882,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_batch_split_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_producer_producer_metrics_batch_split_rate{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -1982,7 +1982,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_compression_rate_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_producer_producer_metrics_compression_rate_avg{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -2124,7 +2124,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_connection_count{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_producer_producer_metrics_connection_count{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -2224,7 +2224,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_connection_creation_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_producer_producer_metrics_connection_creation_rate{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -2324,7 +2324,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_connection_close_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_producer_producer_metrics_connection_close_rate{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -2424,7 +2424,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_io_ratio{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_producer_producer_metrics_io_ratio{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -2524,7 +2524,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_io_wait_ratio{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_producer_producer_metrics_io_wait_ratio{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -2624,7 +2624,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_select_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_producer_producer_metrics_select_rate{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -2724,7 +2724,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_io_time_ns_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_producer_producer_metrics_io_time_ns_avg{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -2824,7 +2824,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_producer_producer_metrics_io_wait_time_ns_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_producer_producer_metrics_io_wait_time_ns_avg{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -2966,7 +2966,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_producer_producer_node_metrics_incoming_byte_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_producer_producer_node_metrics_incoming_byte_rate{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -3066,7 +3066,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_producer_producer_node_metrics_outgoing_byte_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_producer_producer_node_metrics_outgoing_byte_rate{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -3166,7 +3166,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_producer_producer_node_metrics_request_latency_avg{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_producer_producer_node_metrics_request_latency_avg{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -3180,7 +3180,7 @@ }, { "datasource": null, - "expr": "topk(10,kafka_producer_producer_node_metrics_request_latency_max{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_producer_producer_node_metrics_request_latency_max{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -3280,7 +3280,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_producer_producer_node_metrics_request_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_producer_producer_node_metrics_request_rate{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -3380,7 +3380,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_producer_producer_node_metrics_response_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_producer_producer_node_metrics_response_rate{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -3522,7 +3522,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_producer_producer_topic_metrics_byte_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_producer_producer_topic_metrics_byte_rate{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -3622,7 +3622,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_producer_producer_topic_metrics_compression_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_producer_producer_topic_metrics_compression_rate{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -3722,7 +3722,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_producer_producer_topic_metrics_record_send_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_producer_producer_topic_metrics_record_send_rate{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -3822,7 +3822,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_producer_producer_topic_metrics_record_retry_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_producer_producer_topic_metrics_record_retry_rate{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -3922,7 +3922,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_producer_producer_topic_metrics_record_error_rate{env=\"$env\",client_id=~\"$client_id\", hostname=~\"$server\"})", + "expr": "topk(10,kafka_producer_producer_topic_metrics_record_error_rate{env=\"$env\", client_type=\"producer\",hostname=~\"$server\", client_id=~\"$client_id\"})", "format": "time_series", "hide": false, "instant": false, @@ -4081,7 +4081,7 @@ ] }, "timezone": "browser", - "title": "Kafka Producer - v2", + "title": "Kafka Producer", "uid": null, "version": 0 } diff --git a/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-quotas.json b/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-quotas.json index 189bcad9..a78befa6 100644 --- a/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-quotas.json +++ b/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-quotas.json @@ -99,7 +99,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_server_produce_byte_rate{env=\"$env\",user=~\"$user\",client_id=~\"$client_id\", hostname=~\"$broker\"})", + "expr": "topk(10,kafka_server_produce_byte_rate{env=\"$env\",user=~\"$user\",client_id=~\"$client_id\",hostname=~\"$broker\"})", "format": "time_series", "hide": false, "instant": false, @@ -199,7 +199,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_server_fetch_byte_rate{env=\"$env\",user=~\"$user\",client_id=~\"$client_id\", hostname=~\"$broker\"})", + "expr": "topk(10,kafka_server_fetch_byte_rate{env=\"$env\",user=~\"$user\",client_id=~\"$client_id\",hostname=~\"$broker\"})", "format": "time_series", "hide": false, "instant": false, @@ -299,7 +299,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_server_request_request_time{env=\"$env\",user=~\"$user\",client_id=~\"$client_id\", hostname=~\"$broker\"})", + "expr": "topk(10,kafka_server_request_request_time{env=\"$env\",user=~\"$user\",client_id=~\"$client_id\",hostname=~\"$broker\"})", "format": "time_series", "hide": false, "instant": false, @@ -399,7 +399,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_server_produce_throttle_time{env=\"$env\",user=~\"$user\",client_id=~\"$client_id\", hostname=~\"$broker\"} > 0)", + "expr": "topk(10,kafka_server_produce_throttle_time{env=\"$env\",user=~\"$user\",client_id=~\"$client_id\",hostname=~\"$broker\"} > 0)", "format": "time_series", "hide": false, "instant": false, @@ -499,7 +499,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_server_fetch_throttle_time{env=\"$env\",user=~\"$user\",client_id=~\"$client_id\", hostname=~\"$broker\"} > 0)", + "expr": "topk(10,kafka_server_fetch_throttle_time{env=\"$env\",user=~\"$user\",client_id=~\"$client_id\",hostname=~\"$broker\"} > 0)", "format": "time_series", "hide": false, "instant": false, @@ -599,7 +599,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10,kafka_server_request_throttle_time{env=\"$env\",user=~\"$user\",client_id=~\"$client_id\", hostname=~\"$broker\"} > 0)", + "expr": "topk(10,kafka_server_request_throttle_time{env=\"$env\",user=~\"$user\",client_id=~\"$client_id\",hostname=~\"$broker\"} > 0)", "format": "time_series", "hide": false, "instant": false, @@ -773,7 +773,7 @@ ] }, "timezone": "browser", - "title": "Kafka Quotas - v2", + "title": "Kafka Quotas", "uid": null, "version": 0 } diff --git a/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-topics.json b/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-topics.json index 7354ae5c..0a499957 100644 --- a/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-topics.json +++ b/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-topics.json @@ -142,7 +142,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10, sum without(instance,pod,statefulset_kubernetes_io_pod_name) (rate(kafka_server_brokertopicmetrics_messagesinpersec{topic=~\"$topic\",env=\"$env\"}[5m])))", + "expr": "topk(10, sum without(instance,pod,statefulset_kubernetes_io_pod_name) (rate(kafka_server_brokertopicmetrics_messagesinpersec{env=\"$env\",topic=~\"$topic\"}[5m])))", "format": "time_series", "hide": false, "instant": false, @@ -346,7 +346,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10, sum without(instance,pod,statefulset_kubernetes_io_pod_name) (rate(kafka_server_brokertopicmetrics_bytesinpersec{topic=~\"$topic\",env=\"$env\"}[5m])))", + "expr": "topk(10, sum without(instance,pod,statefulset_kubernetes_io_pod_name) (rate(kafka_server_brokertopicmetrics_bytesinpersec{env=\"$env\",topic=~\"$topic\"}[5m])))", "format": "time_series", "hide": false, "instant": false, @@ -448,7 +448,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10, sum without(instance,pod,statefulset_kubernetes_io_pod_name) (rate(kafka_server_brokertopicmetrics_bytesoutpersec{topic=~\"$topic\",env=\"$env\"}[5m])))", + "expr": "topk(10, sum without(instance,pod,statefulset_kubernetes_io_pod_name) (rate(kafka_server_brokertopicmetrics_bytesoutpersec{env=\"$env\",topic=~\"$topic\"}[5m])))", "format": "time_series", "hide": false, "instant": false, @@ -550,7 +550,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10, sum(rate(kafka_server_brokertopicmetrics_totalproducerequestspersec{ env=\"$env\", topic=~\"$topic\"}[5m])) by (topic))", + "expr": "topk(10, sum(rate(kafka_server_brokertopicmetrics_totalproducerequestspersec{ env=\"$env\",topic=~\"$topic\"}[5m])) by (topic))", "format": "time_series", "hide": false, "instant": false, @@ -652,7 +652,7 @@ "targets": [ { "datasource": null, - "expr": "topk(10, sum(rate(kafka_server_brokertopicmetrics_totalfetchrequestspersec{ env=\"$env\", topic=~\"$topic\"}[5m])) by (topic))", + "expr": "topk(10, sum(rate(kafka_server_brokertopicmetrics_totalfetchrequestspersec{ env=\"$env\",topic=~\"$topic\"}[5m])) by (topic))", "format": "time_series", "hide": false, "instant": false, @@ -729,7 +729,7 @@ "fontSize": "100%", "gridPos": { "h": 10, - "w": 12, + "w": 24, "x": 0, "y": 3 }, @@ -762,126 +762,7 @@ "refId": "", "step": 10, "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Start Offsets", - "transformations": [ - { - "id": "organize", - "options": { - "excludeByName": { - "Time": true, - "__name__": true, - "app": true, - "clusterId": true, - "confluentPlatform": true, - "confluent_platform": true, - "controller_revision_hash": true, - "instance": true, - "job": true, - "namespace": true, - "platform_confluent_io_type": true, - "statefulset_kubernetes_io_pod_name": true, - "type": true - }, - "indexByName": { - "Value": 4, - "partition": 3, - "pod": 1, - "topic": 2 - }, - "renameByName": { - "Value": "offset" - } - } - }, - { - "id": "convertFieldType", - "options": { - "conversions": [ - { - "destinationType": "number", - "targetField": "partition" - } - ], - "fields": {} - } - }, - { - "id": "sortBy", - "options": { - "fields": {}, - "sort": [ - { - "field": "topic" - } - ] - } - }, - { - "id": "sortBy", - "options": { - "fields": {}, - "sort": [ - { - "field": "partition" - } - ] - } - } - ], - "transparent": false, - "type": "table" - }, - { - "cacheTimeout": null, - "color": { - "mode": "thresholds" - }, - "columns": [], - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "custom": { - "align": "auto", - "displayMode": "auto", - "filterable": true - }, - "thresholds": { - "mode": "absolute", - "steps": [] - } }, - "overrides": [] - }, - "fontSize": "100%", - "gridPos": { - "h": 10, - "w": 12, - "x": 12, - "y": 3 - }, - "height": null, - "hideTimeOverride": false, - "id": 10, - "interval": null, - "links": [], - "mappings": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "showHeader": true - }, - "repeat": null, - "repeatDirection": null, - "span": 6, - "targets": [ { "datasource": null, "expr": "kafka_log_log_logendoffset{env=\"$env\",topic=~\"$topic\"}", @@ -899,47 +780,27 @@ ], "timeFrom": null, "timeShift": null, - "title": "End Offsets", + "title": "Offsets", "transformations": [ { - "id": "organize", + "id": "concatenate", "options": { - "excludeByName": { - "Time": true, - "__name__": true, - "app": true, - "clusterId": true, - "confluentPlatform": true, - "confluent_platform": true, - "controller_revision_hash": true, - "instance": true, - "job": true, - "namespace": true, - "platform_confluent_io_type": true, - "statefulset_kubernetes_io_pod_name": true, - "type": true - }, - "indexByName": { - "Value": 4, - "partition": 3, - "pod": 1, - "topic": 2 - }, - "renameByName": { - "Value": "offset" - } + "frameNameLabel": "id", + "frameNameMode": "label" } }, { - "id": "convertFieldType", + "id": "filterFieldsByName", "options": { - "conversions": [ - { - "destinationType": "number", - "targetField": "partition" - } - ], - "fields": {} + "include": { + "names": [ + "hostname 1", + "topic 1", + "Value #A", + "Value #B", + "partition 1" + ] + } } }, { @@ -948,7 +809,7 @@ "fields": {}, "sort": [ { - "field": "topic" + "field": "partition 1" } ] } @@ -959,10 +820,32 @@ "fields": {}, "sort": [ { - "field": "partition" + "field": "topic 1" } ] } + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "Value #A": 3, + "Value #B": 4, + "hostname 1": 0, + "id": 5, + "partition 1": 2, + "topic 1": 1 + }, + "renameByName": { + "Value #A": "start offset", + "Value #B": "end offset", + "hostname 1": "broker", + "partition 1": "", + "topic": "", + "topic 1": "" + } + } } ], "transparent": false, @@ -1079,7 +962,7 @@ ] }, "timezone": "browser", - "title": "Kafka topics - v2", + "title": "Kafka topics", "uid": null, "version": 0 } diff --git a/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/ksqldb-cluster.json b/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/ksqldb-cluster.json index 9f98004e..adbb1df0 100644 --- a/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/ksqldb-cluster.json +++ b/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/ksqldb-cluster.json @@ -502,7 +502,7 @@ { "cacheTimeout": null, "datasource": "Prometheus", - "description": null, + "description": "A metric with constant value 1 indicating the server is up and emitting metrics.", "editable": true, "error": false, "fieldConfig": { @@ -602,7 +602,7 @@ { "cacheTimeout": null, "datasource": "Prometheus", - "description": null, + "description": "The number of messages consumed per second across all queries.", "editable": true, "error": false, "fieldConfig": { @@ -702,7 +702,7 @@ { "cacheTimeout": null, "datasource": "Prometheus", - "description": null, + "description": "The number of messages produced per second across all queries.", "editable": true, "error": false, "fieldConfig": { @@ -1256,98 +1256,12 @@ "instant": false, "interval": "", "intervalFactor": 2, - "legendFormat": "{{thread_id}}", + "legendFormat": "{{thread_id}} (avg.)", "metric": "", "refId": "", "step": 10, "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Poll Latency (Avg.)", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 3 - }, - "height": null, - "hideTimeOverride": false, - "id": 16, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ { "datasource": null, "expr": "kafka_streams_stream_thread_metrics_poll_latency_max{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", @@ -1356,7 +1270,7 @@ "instant": false, "interval": "", "intervalFactor": 2, - "legendFormat": "{{thread_id}}", + "legendFormat": "{{thread_id}} (max.)", "metric": "", "refId": "", "step": 10, @@ -1365,7 +1279,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "Poll Latency (Max.)", + "title": "Poll Latency", "transformations": [], "transparent": false, "type": "timeseries" @@ -1419,12 +1333,12 @@ "gridPos": { "h": 10, "w": 8, - "x": 0, - "y": 4 + "x": 8, + "y": 3 }, "height": null, "hideTimeOverride": false, - "id": 17, + "id": 16, "interval": null, "links": [], "maxDataPoints": 100, @@ -1456,98 +1370,12 @@ "instant": false, "interval": "", "intervalFactor": 2, - "legendFormat": "{{thread_id}}", + "legendFormat": "{{thread_id}} (avg.)", "metric": "", "refId": "", "step": 10, "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Process Latency (Avg.)", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 4 - }, - "height": null, - "hideTimeOverride": false, - "id": 18, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ { "datasource": null, "expr": "kafka_streams_stream_thread_metrics_process_latency_max{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", @@ -1556,7 +1384,7 @@ "instant": false, "interval": "", "intervalFactor": 2, - "legendFormat": "{{thread_id}}", + "legendFormat": "{{thread_id}} (max.)", "metric": "", "refId": "", "step": 10, @@ -1565,7 +1393,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "Process Latency (Max.)", + "title": "Process Latency", "transformations": [], "transparent": false, "type": "timeseries" @@ -1620,11 +1448,11 @@ "h": 10, "w": 8, "x": 0, - "y": 5 + "y": 4 }, "height": null, "hideTimeOverride": false, - "id": 19, + "id": 17, "interval": null, "links": [], "maxDataPoints": 100, @@ -1656,98 +1484,12 @@ "instant": false, "interval": "", "intervalFactor": 2, - "legendFormat": "{{thread_id}}", + "legendFormat": "{{thread_id}} (avg.)", "metric": "", "refId": "", "step": 10, "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Commit Latency (Avg.)", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 5 - }, - "height": null, - "hideTimeOverride": false, - "id": 20, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ { "datasource": null, "expr": "kafka_streams_stream_thread_metrics_commit_latency_max{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", @@ -1756,7 +1498,7 @@ "instant": false, "interval": "", "intervalFactor": 2, - "legendFormat": "{{thread_id}}", + "legendFormat": "{{thread_id}} (max.)", "metric": "", "refId": "", "step": 10, @@ -1765,7 +1507,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "Commit Latency (Max.)", + "title": "Commit Latency", "transformations": [], "transparent": false, "type": "timeseries" @@ -1819,12 +1561,12 @@ "gridPos": { "h": 10, "w": 8, - "x": 0, - "y": 6 + "x": 8, + "y": 4 }, "height": null, "hideTimeOverride": false, - "id": 21, + "id": 18, "interval": null, "links": [], "maxDataPoints": 100, @@ -1856,98 +1598,12 @@ "instant": false, "interval": "", "intervalFactor": 2, - "legendFormat": "{{thread_id}}", + "legendFormat": "{{thread_id}} (avg.)", "metric": "", "refId": "", "step": 10, "target": "" - } - ], - "timeFrom": null, - "timeShift": null, - "title": "Punctuate Latency (Avg.)", - "transformations": [], - "transparent": false, - "type": "timeseries" - }, - { - "cacheTimeout": null, - "datasource": "Prometheus", - "description": null, - "editable": true, - "error": false, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "log": 2, - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": {}, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [] - }, - "unit": "ms" - }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 8, - "x": 8, - "y": 6 - }, - "height": null, - "hideTimeOverride": false, - "id": 22, - "interval": null, - "links": [], - "maxDataPoints": 100, - "maxPerRow": null, - "minSpan": null, - "options": { - "legend": { - "calcs": [ - "max", - "mean", - "last" - ], - "displayMode": "table", - "placement": "bottom" }, - "tooltip": { - "mode": "single" - } - }, - "repeat": null, - "repeatDirection": null, - "span": null, - "targets": [ { "datasource": null, "expr": "kafka_streams_stream_thread_metrics_punctuate_latency_max{env=\"$env\",ksqldb_cluster_id=\"$ksqldb_cluster\",hostname=~\"$ksqldb_server\"thread_id=~\".+$ksqldb_cluster_id.+\"}", @@ -1956,7 +1612,7 @@ "instant": false, "interval": "", "intervalFactor": 2, - "legendFormat": "{{thread_id}}", + "legendFormat": "{{thread_id}} (max.)", "metric": "", "refId": "", "step": 10, @@ -1965,7 +1621,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "Punctuate Latency (Max.)", + "title": "Punctuate Latency", "transformations": [], "transparent": false, "type": "timeseries" @@ -2001,11 +1657,11 @@ "h": 1, "w": 24, "x": 0, - "y": 7 + "y": 5 }, "height": null, "hideTimeOverride": false, - "id": 23, + "id": 19, "interval": null, "links": [], "maxDataPoints": 100, @@ -2062,11 +1718,11 @@ "h": 10, "w": 8, "x": 0, - "y": 7 + "y": 5 }, "height": null, "hideTimeOverride": false, - "id": 24, + "id": 20, "interval": null, "links": [], "maxDataPoints": 100, @@ -2162,11 +1818,11 @@ "h": 10, "w": 8, "x": 8, - "y": 7 + "y": 5 }, "height": null, "hideTimeOverride": false, - "id": 25, + "id": 21, "interval": null, "links": [], "maxDataPoints": 100, @@ -2262,11 +1918,11 @@ "h": 10, "w": 8, "x": 16, - "y": 7 + "y": 5 }, "height": null, "hideTimeOverride": false, - "id": 26, + "id": 22, "interval": null, "links": [], "maxDataPoints": 100, @@ -2362,11 +2018,11 @@ "h": 10, "w": 8, "x": 0, - "y": 8 + "y": 6 }, "height": null, "hideTimeOverride": false, - "id": 27, + "id": 23, "interval": null, "links": [], "maxDataPoints": 100, @@ -2462,11 +2118,11 @@ "h": 10, "w": 8, "x": 8, - "y": 8 + "y": 6 }, "height": null, "hideTimeOverride": false, - "id": 28, + "id": 24, "interval": null, "links": [], "maxDataPoints": 100, @@ -2562,11 +2218,11 @@ "h": 10, "w": 8, "x": 16, - "y": 8 + "y": 6 }, "height": null, "hideTimeOverride": false, - "id": 29, + "id": 25, "interval": null, "links": [], "maxDataPoints": 100, @@ -2662,11 +2318,11 @@ "h": 10, "w": 8, "x": 0, - "y": 9 + "y": 7 }, "height": null, "hideTimeOverride": false, - "id": 30, + "id": 26, "interval": null, "links": [], "maxDataPoints": 100, @@ -2762,11 +2418,11 @@ "h": 10, "w": 8, "x": 8, - "y": 9 + "y": 7 }, "height": null, "hideTimeOverride": false, - "id": 31, + "id": 27, "interval": null, "links": [], "maxDataPoints": 100, @@ -2862,11 +2518,11 @@ "h": 10, "w": 8, "x": 16, - "y": 9 + "y": 7 }, "height": null, "hideTimeOverride": false, - "id": 32, + "id": 28, "interval": null, "links": [], "maxDataPoints": 100, @@ -2962,11 +2618,11 @@ "h": 10, "w": 8, "x": 0, - "y": 10 + "y": 8 }, "height": null, "hideTimeOverride": false, - "id": 33, + "id": 29, "interval": null, "links": [], "maxDataPoints": 100, @@ -3062,11 +2718,11 @@ "h": 10, "w": 8, "x": 8, - "y": 10 + "y": 8 }, "height": null, "hideTimeOverride": false, - "id": 34, + "id": 30, "interval": null, "links": [], "maxDataPoints": 100, @@ -3162,11 +2818,11 @@ "h": 10, "w": 8, "x": 16, - "y": 10 + "y": 8 }, "height": null, "hideTimeOverride": false, - "id": 35, + "id": 31, "interval": null, "links": [], "maxDataPoints": 100, @@ -3262,11 +2918,11 @@ "h": 10, "w": 8, "x": 0, - "y": 11 + "y": 9 }, "height": null, "hideTimeOverride": false, - "id": 36, + "id": 32, "interval": null, "links": [], "maxDataPoints": 100, @@ -3362,11 +3018,11 @@ "h": 10, "w": 8, "x": 8, - "y": 11 + "y": 9 }, "height": null, "hideTimeOverride": false, - "id": 37, + "id": 33, "interval": null, "links": [], "maxDataPoints": 100, @@ -3462,11 +3118,11 @@ "h": 10, "w": 8, "x": 16, - "y": 11 + "y": 9 }, "height": null, "hideTimeOverride": false, - "id": 38, + "id": 34, "interval": null, "links": [], "maxDataPoints": 100, @@ -3677,7 +3333,7 @@ ] }, "timezone": "browser", - "title": "ksqlDB cluster - v2", + "title": "ksqlDB cluster", "uid": null, "version": 0 } diff --git a/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/schema-registry-cluster.json b/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/schema-registry-cluster.json index 72402c7d..b16283ea 100644 --- a/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/schema-registry-cluster.json +++ b/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/schema-registry-cluster.json @@ -931,7 +931,7 @@ ] }, "timezone": "browser", - "title": "Schema Registry cluster - v2", + "title": "Schema Registry cluster", "uid": null, "version": 0 } diff --git a/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/zookeeper-cluster.json b/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/zookeeper-cluster.json index 465129dc..d2c4c741 100644 --- a/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/zookeeper-cluster.json +++ b/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/zookeeper-cluster.json @@ -1908,7 +1908,7 @@ ] }, "timezone": "browser", - "title": "Zookeeper cluster - v2", + "title": "Zookeeper cluster", "uid": null, "version": 0 } From 57a02f979e5bcc01c23cfd1146a04ddd7256e80f Mon Sep 17 00:00:00 2001 From: Jorge Esteban Quilcate Otoya Date: Tue, 12 Jul 2022 21:49:50 +0100 Subject: [PATCH 27/28] fix: offsets table order --- .../grafana/kafka-topics.json | 5 +- grafana-dashboards/kafka-topics.py | 94 +++++++------------ .../provisioning/dashboards/kafka-topics.json | 3 +- 3 files changed, 38 insertions(+), 64 deletions(-) diff --git a/cfk-prometheus-grafana/grafana/kafka-topics.json b/cfk-prometheus-grafana/grafana/kafka-topics.json index edbbb45b..6951e4e7 100644 --- a/cfk-prometheus-grafana/grafana/kafka-topics.json +++ b/cfk-prometheus-grafana/grafana/kafka-topics.json @@ -832,16 +832,15 @@ "indexByName": { "Value #A": 3, "Value #B": 4, - "hostname 1": 0, - "id": 5, "partition 1": 2, + "pod 1": 5, "topic 1": 1 }, "renameByName": { "Value #A": "start offset", "Value #B": "end offset", - "hostname 1": "broker", "partition 1": "", + "pod 1": "broker", "topic": "", "topic 1": "" } diff --git a/grafana-dashboards/kafka-topics.py b/grafana-dashboards/kafka-topics.py index f3e0a907..0a64cf5d 100644 --- a/grafana-dashboards/kafka-topics.py +++ b/grafana-dashboards/kafka-topics.py @@ -194,70 +194,46 @@ def dashboard(ds="Prometheus", env_label="namespace", server_label="pod"): offsets_txs = [ { - "id": "concatenate", - "options": { - "frameNameLabel": "id", - "frameNameMode": "label" - } + "id": "concatenate", + "options": {"frameNameLabel": "id", "frameNameMode": "label"}, }, { - "id": "filterFieldsByName", - "options": { - "include": { - "names": [ - server_label+" 1", - "topic 1", - "Value #A", - "Value #B", - "partition 1" - ] - } - } - }, - { - "id": "sortBy", - "options": { - "fields": {}, - "sort": [ - { - "field": "partition 1" - } - ] - } - }, - { - "id": "sortBy", - "options": { - "fields": {}, - "sort": [ - { - "field": "topic 1" - } - ] - } + "id": "filterFieldsByName", + "options": { + "include": { + "names": [ + server_label + " 1", + "topic 1", + "Value #A", + "Value #B", + "partition 1", + ] + } + }, }, + {"id": "sortBy", "options": {"fields": {}, "sort": [{"field": "partition 1"}]}}, + {"id": "sortBy", "options": {"fields": {}, "sort": [{"field": "topic 1"}]}}, { - "id": "organize", - "options": { - "excludeByName": {}, - "indexByName": { - "Value #A": 3, - "Value #B": 4, - "hostname 1": 0, - "id": 5, - "partition 1": 2, - "topic 1": 1 + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "Value #A": 3, + "Value #B": 4, + server_label + " 1": 5, + "partition 1": 2, + "topic 1": 1, + }, + "renameByName": { + "Value #A": "start offset", + "Value #B": "end offset", + server_label + " 1": "broker", + "partition 1": "", + "topic": "", + "topic 1": "", + }, }, - "renameByName": { - "Value #A": "start offset", - "Value #B": "end offset", - "hostname 1": "broker", - "partition 1": "", - "topic": "", - "topic 1": "" - } - } - } + }, ] ## Offsets diff --git a/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-topics.json b/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-topics.json index 0a499957..1f1ddccb 100644 --- a/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-topics.json +++ b/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-topics.json @@ -832,8 +832,7 @@ "indexByName": { "Value #A": 3, "Value #B": 4, - "hostname 1": 0, - "id": 5, + "hostname 1": 5, "partition 1": 2, "topic 1": 1 }, From 48c75954860a23ce65c9887f72c5b9b17e4db64d Mon Sep 17 00:00:00 2001 From: Jorge Esteban Quilcate Otoya Date: Wed, 3 Aug 2022 12:44:20 +0100 Subject: [PATCH 28/28] fix: add missing panels --- .../grafana/kafka-connect-cluster.json | 6 +- .../grafana/ksqldb-cluster.json | 2 +- .../grafana/schema-registry-cluster.json | 341 ++++++++++++++++++ grafana-dashboards/kafka-connect-cluster.py | 6 +- grafana-dashboards/ksqldb-cluster.py | 2 +- grafana-dashboards/schema-registry-cluster.py | 57 ++- .../dashboards/kafka-connect-cluster.json | 6 +- .../dashboards/ksqldb-cluster.json | 2 +- .../dashboards/schema-registry-cluster.json | 341 ++++++++++++++++++ 9 files changed, 750 insertions(+), 13 deletions(-) diff --git a/cfk-prometheus-grafana/grafana/kafka-connect-cluster.json b/cfk-prometheus-grafana/grafana/kafka-connect-cluster.json index 39c2d217..261d085c 100644 --- a/cfk-prometheus-grafana/grafana/kafka-connect-cluster.json +++ b/cfk-prometheus-grafana/grafana/kafka-connect-cluster.json @@ -1640,7 +1640,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "Batch Size", + "title": "Batch size", "transformations": [], "transparent": false, "type": "timeseries" @@ -1694,7 +1694,7 @@ "gridPos": { "h": 10, "w": 8, - "x": 0, + "x": 8, "y": 6 }, "height": null, @@ -1754,7 +1754,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "Offset commit", + "title": "Offset commit success/failure", "transformations": [], "transparent": false, "type": "timeseries" diff --git a/cfk-prometheus-grafana/grafana/ksqldb-cluster.json b/cfk-prometheus-grafana/grafana/ksqldb-cluster.json index ff7a2334..946951e4 100644 --- a/cfk-prometheus-grafana/grafana/ksqldb-cluster.json +++ b/cfk-prometheus-grafana/grafana/ksqldb-cluster.json @@ -55,7 +55,7 @@ "targets": [], "timeFrom": null, "timeShift": null, - "title": "Overview", + "title": "Cluster Overview", "transformations": [], "transparent": false, "type": "row" diff --git a/cfk-prometheus-grafana/grafana/schema-registry-cluster.json b/cfk-prometheus-grafana/grafana/schema-registry-cluster.json index 3b0dd952..bcf9cc96 100644 --- a/cfk-prometheus-grafana/grafana/schema-registry-cluster.json +++ b/cfk-prometheus-grafana/grafana/schema-registry-cluster.json @@ -831,6 +831,347 @@ "transformations": [], "transparent": false, "type": "timeseries" + }, + { + "cacheTimeout": null, + "collapsed": false, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 11, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Requests", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 12, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_schema_registry_kafka_schema_registry_metrics_connection_count{namespace=\"$env\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connections", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 13, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_schema_registry_jersey_metrics_request_rate{namespace=\"$env\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Request Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "${DS_PROMETHEUS}", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 14, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_schema_registry_jersey_metrics_request_latency_99{namespace=\"$env\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{pod}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Request Latency (p99)", + "transformations": [], + "transparent": false, + "type": "timeseries" } ], "refresh": "30s", diff --git a/grafana-dashboards/kafka-connect-cluster.py b/grafana-dashboards/kafka-connect-cluster.py index 11fd70cf..1805dc75 100644 --- a/grafana-dashboards/kafka-connect-cluster.py +++ b/grafana-dashboards/kafka-connect-cluster.py @@ -661,7 +661,7 @@ def dashboard( tasks_base = worker_base + 1 tasks_inner = [ G.TimeSeries( - title="Batch Size", + title="Batch size", description="Maximum and average size of the batches processed by the connector task.", dataSource=ds, targets=[ @@ -686,7 +686,7 @@ def dashboard( ), ), G.TimeSeries( - title="Offset commit", + title="Offset commit success/failure", description="Percentage of offset commit successful and failed.", dataSource=ds, targets=[ @@ -707,7 +707,7 @@ def dashboard( legendCalcs=["max", "mean", "last"], unit="percentunit", gridPos=G.GridPos( - h=default_height * 2, w=ts_width, x=ts_width * 0, y=tasks_base + h=default_height * 2, w=ts_width, x=ts_width * 1, y=tasks_base ), ), G.TimeSeries( diff --git a/grafana-dashboards/ksqldb-cluster.py b/grafana-dashboards/ksqldb-cluster.py index d2e5adb8..22755a90 100644 --- a/grafana-dashboards/ksqldb-cluster.py +++ b/grafana-dashboards/ksqldb-cluster.py @@ -90,7 +90,7 @@ def dashboard( overview_base = 0 overview_panels = [ G.RowPanel( - title="Overview", + title="Cluster Overview", gridPos=G.GridPos(h=1, w=24, x=0, y=overview_base), ), # First layer diff --git a/grafana-dashboards/schema-registry-cluster.py b/grafana-dashboards/schema-registry-cluster.py index 3b4db59a..1b61e38b 100644 --- a/grafana-dashboards/schema-registry-cluster.py +++ b/grafana-dashboards/schema-registry-cluster.py @@ -218,8 +218,63 @@ def dashboard(ds="Prometheus", env_label="namespace", server_label="pod"): ), ] + + request_panels = [ + G.RowPanel( + title="Requests", + gridPos=G.GridPos(h=1, w=24, x=0, y=2), + ), + G.TimeSeries( + title="Connections", + dataSource=ds, + targets=[ + G.Target( + expr="kafka_schema_registry_kafka_schema_registry_metrics_connection_count{" + + by_env + +"}", + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + gridPos=G.GridPos(h=default_height * 2, w=ts_width, x=ts_width * 0, y=2), + ), + G.TimeSeries( + title="Request Rate", + dataSource=ds, + targets=[ + G.Target( + expr="kafka_schema_registry_jersey_metrics_request_rate{" + + by_env + +"}", + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="reqps", + gridPos=G.GridPos(h=default_height * 2, w=ts_width, x=ts_width * 1, y=2), + ), + G.TimeSeries( + title="Request Latency (p99)", + dataSource=ds, + targets=[ + G.Target( + expr="kafka_schema_registry_jersey_metrics_request_latency_99{" + + by_env + +"}", + legendFormat="{{" + server_label + "}}", + ), + ], + legendDisplayMode="table", + legendCalcs=["max", "mean", "last"], + unit="ms", + gridPos=G.GridPos(h=default_height * 2, w=ts_width, x=ts_width * 2, y=2), + ), + ] + # group all panels - panels = healthcheck_panels + system_panels + panels = healthcheck_panels + system_panels + request_panels # build dashboard return G.Dashboard( diff --git a/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-connect-cluster.json b/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-connect-cluster.json index cb9b034b..1987f361 100644 --- a/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-connect-cluster.json +++ b/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/kafka-connect-cluster.json @@ -1640,7 +1640,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "Batch Size", + "title": "Batch size", "transformations": [], "transparent": false, "type": "timeseries" @@ -1694,7 +1694,7 @@ "gridPos": { "h": 10, "w": 8, - "x": 0, + "x": 8, "y": 6 }, "height": null, @@ -1754,7 +1754,7 @@ ], "timeFrom": null, "timeShift": null, - "title": "Offset commit", + "title": "Offset commit success/failure", "transformations": [], "transparent": false, "type": "timeseries" diff --git a/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/ksqldb-cluster.json b/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/ksqldb-cluster.json index adbb1df0..91fa6305 100644 --- a/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/ksqldb-cluster.json +++ b/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/ksqldb-cluster.json @@ -55,7 +55,7 @@ "targets": [], "timeFrom": null, "timeShift": null, - "title": "Overview", + "title": "Cluster Overview", "transformations": [], "transparent": false, "type": "row" diff --git a/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/schema-registry-cluster.json b/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/schema-registry-cluster.json index b16283ea..cd99a568 100644 --- a/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/schema-registry-cluster.json +++ b/jmxexporter-prometheus-grafana/assets/grafana/provisioning/dashboards/schema-registry-cluster.json @@ -831,6 +831,347 @@ "transformations": [], "transparent": false, "type": "timeseries" + }, + { + "cacheTimeout": null, + "collapsed": false, + "datasource": null, + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "thresholds": { + "mode": "absolute", + "steps": [] + } + } + }, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 11, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "panels": [], + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [], + "timeFrom": null, + "timeShift": null, + "title": "Requests", + "transformations": [], + "transparent": false, + "type": "row" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 12, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_schema_registry_kafka_schema_registry_metrics_connection_count{env=\"$env\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Connections", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "reqps" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 13, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_schema_registry_jersey_metrics_request_rate{env=\"$env\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Request Rate", + "transformations": [], + "transparent": false, + "type": "timeseries" + }, + { + "cacheTimeout": null, + "datasource": "Prometheus", + "description": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "log": 2, + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": {}, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [] + }, + "unit": "ms" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 2 + }, + "height": null, + "hideTimeOverride": false, + "id": 14, + "interval": null, + "links": [], + "maxDataPoints": 100, + "maxPerRow": null, + "minSpan": null, + "options": { + "legend": { + "calcs": [ + "max", + "mean", + "last" + ], + "displayMode": "table", + "placement": "bottom" + }, + "tooltip": { + "mode": "single" + } + }, + "repeat": null, + "repeatDirection": null, + "span": null, + "targets": [ + { + "datasource": null, + "expr": "kafka_schema_registry_jersey_metrics_request_latency_99{env=\"$env\"}", + "format": "time_series", + "hide": false, + "instant": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{hostname}}", + "metric": "", + "refId": "", + "step": 10, + "target": "" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Request Latency (p99)", + "transformations": [], + "transparent": false, + "type": "timeseries" } ], "refresh": "30s",