Skip to content

Commit d816af8

Browse files
committed
storeliveness: add batching metrics to storeliveness transport
Previously, the only metrics on storeliveness transport for batching is a simple counter that shows how many aggregate storeliveness messages have been sent. For paced storeliveness heartbeats verification, we need to compare the same metrics with the baseline to see if the changes introduced via pacing would have some negative impact on heartbeat sending - particularly with respect to batching. This commit adds the same metrics that are present in paced storeliveness heartbeats; additionally, since this commit only introduces metrics, there will not be any impact on current behaviour. Fixes: None Release note: None
1 parent 9c4252b commit d816af8

File tree

4 files changed

+38
-1
lines changed

4 files changed

+38
-1
lines changed

docs/generated/metrics/metrics.yaml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17365,6 +17365,22 @@ layers:
1736517365
unit: COUNT
1736617366
aggregation: AVG
1736717367
derivative: NON_NEGATIVE_DERIVATIVE
17368+
- name: storeliveness.transport.batches-received
17369+
exported_name: storeliveness_transport_batches_received
17370+
description: Number of message batches received by the Store Liveness Transport
17371+
y_axis_label: Batches
17372+
type: COUNTER
17373+
unit: COUNT
17374+
aggregation: AVG
17375+
derivative: NON_NEGATIVE_DERIVATIVE
17376+
- name: storeliveness.transport.batches-sent
17377+
exported_name: storeliveness_transport_batches_sent
17378+
description: Number of message batches sent by the Store Liveness Transport
17379+
y_axis_label: Batches
17380+
type: COUNTER
17381+
unit: COUNT
17382+
aggregation: AVG
17383+
derivative: NON_NEGATIVE_DERIVATIVE
1736817384
- name: storeliveness.transport.receive-queue-bytes
1736917385
exported_name: storeliveness_transport_receive_queue_bytes
1737017386
description: Total byte size of pending incoming messages from Store Liveness Transport

pkg/kv/kvserver/storeliveness/metrics.go

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,9 @@ type TransportMetrics struct {
2828
MessagesReceived *metric.Counter
2929
MessagesSendDropped *metric.Counter
3030
MessagesReceiveDropped *metric.Counter
31+
32+
BatchesSent *metric.Counter
33+
BatchesReceived *metric.Counter
3134
}
3235

3336
func newTransportMetrics() *TransportMetrics {
@@ -39,6 +42,8 @@ func newTransportMetrics() *TransportMetrics {
3942
MessagesReceived: metric.NewCounter(metaMessagesReceived),
4043
MessagesSendDropped: metric.NewCounter(metaMessagesSendDropped),
4144
MessagesReceiveDropped: metric.NewCounter(metaMessagesReceiveDropped),
45+
BatchesSent: metric.NewCounter(metaBatchesSent),
46+
BatchesReceived: metric.NewCounter(metaBatchesReceived),
4247
}
4348
}
4449

@@ -201,11 +206,22 @@ var (
201206
Measurement: "Bytes",
202207
Unit: metric.Unit_BYTES,
203208
}
204-
205209
metaCallbacksProcessingDuration = metric.Metadata{
206210
Name: "storeliveness.callbacks.processing_duration",
207211
Help: "Duration of support withdrawal callback processing",
208212
Measurement: "Duration",
209213
Unit: metric.Unit_NANOSECONDS,
210214
}
215+
metaBatchesSent = metric.Metadata{
216+
Name: "storeliveness.transport.batches-sent",
217+
Help: "Number of message batches sent by the Store Liveness Transport",
218+
Measurement: "Batches",
219+
Unit: metric.Unit_COUNT,
220+
}
221+
metaBatchesReceived = metric.Metadata{
222+
Name: "storeliveness.transport.batches-received",
223+
Help: "Number of message batches received by the Store Liveness Transport",
224+
Measurement: "Batches",
225+
Unit: metric.Unit_COUNT,
226+
}
211227
)

pkg/kv/kvserver/storeliveness/transport.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,7 @@ func (t *Transport) stream(stream slpb.RPCStoreLiveness_StreamStream) error {
168168
if err != nil {
169169
return err
170170
}
171+
t.metrics.BatchesReceived.Inc(1)
171172
if !batch.Now.IsEmpty() {
172173
t.clock.Update(batch.Now)
173174
}
@@ -397,6 +398,8 @@ func (t *Transport) processQueue(
397398
t.metrics.MessagesSendDropped.Inc(int64(len(batch.Messages)))
398399
return err
399400
}
401+
402+
t.metrics.BatchesSent.Inc(1)
400403
t.metrics.MessagesSent.Inc(int64(len(batch.Messages)))
401404

402405
// Reuse the Messages slice, but zero out the contents to avoid delaying

pkg/roachprod/agents/opentelemetry/cockroachdb_metrics.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2446,6 +2446,8 @@ var cockroachdbMetrics = map[string]string{
24462446
"storeliveness_support_from_stores": "storeliveness.support_from.stores",
24472447
"storeliveness_support_withdraw_failures": "storeliveness.support_withdraw.failures",
24482448
"storeliveness_support_withdraw_successes": "storeliveness.support_withdraw.successes",
2449+
"storeliveness_transport_batches_received": "storeliveness.transport.batches_received",
2450+
"storeliveness_transport_batches_sent": "storeliveness.transport.batches_sent",
24492451
"storeliveness_transport_receive_dropped": "storeliveness.transport.receive_dropped",
24502452
"storeliveness_transport_receive_queue_bytes": "storeliveness.transport.receive_queue_bytes",
24512453
"storeliveness_transport_receive_queue_size": "storeliveness.transport.receive_queue_size",

0 commit comments

Comments
 (0)