Skip to content

Commit a79bf8e

Browse files
committed
asim: use kvserver.LoadBasedRebalancingObjective
Previously, LBRebalancingObjective was part of the simulation settings, using QPS as the load rebalance objective. However, the cluster setting kvserver.LoadBasedRebalancingObjective defaults to CPU rebalancing instead. This commit updates asim to rely on the cluster setting, adopting the default cluster setting configuration.
1 parent 8f19f41 commit a79bf8e

File tree

9 files changed

+78
-39
lines changed

9 files changed

+78
-39
lines changed

pkg/kv/kvserver/asim/config/settings.go

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,6 @@ const (
2929
defaultSplitStatRetention = 10 * time.Minute
3030
defaultSeed = 42
3131
defaultLBRebalancingInterval = time.Minute
32-
defaultLBRebalanceQPSThreshold = 0.1
33-
defaultLBRebalancingObjective = 0 // QPS
3432
)
3533

3634
const DefaultNodeCPURateCapacityNanos = 8 * 1e9 // 8 vcpus
@@ -101,8 +99,6 @@ type SimulationSettings struct {
10199
// SplitStatRetention is the duration which recorded load will be retained
102100
// and factored into load based splitting decisions.
103101
SplitStatRetention time.Duration
104-
// LBRebalancingObjective is the load objective to balance.
105-
LBRebalancingObjective int64
106102
// LBRebalancingInterval controls how often the store rebalancer will
107103
// consider opportunities for rebalancing.
108104
LBRebalancingInterval time.Duration
@@ -141,7 +137,6 @@ func DefaultSimulationSettings() *SimulationSettings {
141137
StateExchangeDelay: defaultStateExchangeDelay,
142138
SplitQPSThreshold: defaultSplitQPSThreshold,
143139
SplitStatRetention: defaultSplitStatRetention,
144-
LBRebalancingObjective: defaultLBRebalancingObjective,
145140
LBRebalancingInterval: defaultLBRebalancingInterval,
146141
ReplicateQueueEnabled: true,
147142
LeaseQueueEnabled: true,

pkg/kv/kvserver/asim/storerebalancer/store_rebalancer.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -126,11 +126,11 @@ type simRebalanceObjectiveProvider struct {
126126

127127
// Objective returns the current rebalance objective.
128128
func (s simRebalanceObjectiveProvider) Objective() kvserver.LBRebalancingObjective {
129-
return kvserver.LBRebalancingObjective(s.settings.LBRebalancingObjective)
129+
return kvserver.LoadBasedRebalancingObjective.Get(&s.settings.ST.SV)
130130
}
131131

132132
func (src *storeRebalancerControl) scorerOptions() *allocatorimpl.LoadScorerOptions {
133-
dim := kvserver.LBRebalancingObjective(src.settings.LBRebalancingObjective).ToDimension()
133+
dim := kvserver.LoadBasedRebalancingObjective.Get(&src.settings.ST.SV).ToDimension()
134134
return &allocatorimpl.LoadScorerOptions{
135135
BaseScorerOptions: allocatorimpl.BaseScorerOptions{
136136
IOOverload: src.allocator.IOOverloadOptions(),
@@ -191,7 +191,7 @@ func (src *storeRebalancerControl) phasePrologue(
191191
ctx, src.scorerOptions(),
192192
hottestRanges(
193193
s, src.storeID,
194-
kvserver.LBRebalancingObjective(src.settings.LBRebalancingObjective).ToDimension(),
194+
kvserver.LoadBasedRebalancingObjective.Get(&src.settings.ST.SV).ToDimension(),
195195
),
196196
kvserverbase.LoadBasedRebalancingMode.Get(&src.settings.ST.SV),
197197
)

pkg/kv/kvserver/asim/storerebalancer/store_rebalancer_test.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import (
1010
"testing"
1111
"time"
1212

13+
"github.com/cockroachdb/cockroach/pkg/kv/kvserver"
1314
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/asim/config"
1415
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/asim/gossip"
1516
"github.com/cockroachdb/cockroach/pkg/kv/kvserver/asim/op"
@@ -36,6 +37,8 @@ func TestStoreRebalancer(t *testing.T) {
3637
start := testSettings.StartTime
3738
testSettings.ReplicaChangeBaseDelay = 5 * time.Second
3839
testSettings.StateExchangeDelay = 0
40+
ctx := context.Background()
41+
kvserver.LoadBasedRebalancingObjective.Override(ctx, &testSettings.ST.SV, kvserver.LBRebalancingQueries)
3942

4043
clusterInfo := state.ClusterInfoWithStoreCount(6, 1 /* storesPerNode */)
4144

@@ -195,6 +198,8 @@ func TestStoreRebalancerBalances(t *testing.T) {
195198
testSettings.ReplicaChangeBaseDelay = 1 * time.Second
196199
testSettings.StateExchangeInterval = 1 * time.Second
197200
testSettings.StateExchangeDelay = 0
201+
ctx := context.Background()
202+
kvserver.LoadBasedRebalancingObjective.Override(ctx, &testSettings.ST.SV, kvserver.LBRebalancingQueries)
198203

199204
distributeQPS := func(s state.State, qpsCounts map[state.StoreID]float64) {
200205
dist := make([]float64, len(qpsCounts))

pkg/kv/kvserver/asim/tests/datadriven_simulation_test.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -724,7 +724,6 @@ func TestDataDriven(t *testing.T) {
724724
dns = scanIfExists(t, d, "rebalance_range_threshold", &settingsGen.Settings.RangeRebalanceThreshold) || dns
725725
dns = scanIfExists(t, d, "gossip_delay", &settingsGen.Settings.StateExchangeDelay) || dns
726726
dns = scanIfExists(t, d, "range_size_split_threshold", &settingsGen.Settings.RangeSizeSplitThreshold) || dns
727-
dns = scanIfExists(t, d, "rebalance_objective", &settingsGen.Settings.LBRebalancingObjective) || dns
728727
var snapshotRateMiB int
729728
dns = scanIfExists(t, d, "rebalancing_snapshot_rate_mib", &snapshotRateMiB) || dns
730729
if snapshotRateMiB != 0 {

pkg/kv/kvserver/asim/tests/testdata/generated/sma/rebalancing/rebalancing_sma-count_1.json

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

pkg/kv/kvserver/asim/tests/testdata/generated/sma/rebalancing/rebalancing_sma-count_2.json

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

pkg/kv/kvserver/asim/tests/testdata/non_rand/mma/skewed_cpu_even_ranges_mma.txt

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -44,19 +44,19 @@ setting split_queue_enabled=false
4444
# load around. Possibly a bug?
4545
eval duration=22m samples=1 seed=42 cfgs=(sma-count,mma-only,mma-count) metrics=(cpu,cpu_util,leases,replicas,write_bytes_per_second)
4646
----
47-
cpu#1: last: [s1=983823267, s2=1123530532, s3=703189464, s4=419443481, s5=432770664, s6=559726455, s7=141944069, s8=282935590, s9=425381126] (stddev=302430545.53, mean=563638294.22, sum=5072744648)
48-
cpu#1: thrash_pct: [s1=13%, s2=49%, s3=46%, s4=8%, s5=9%, s6=10%, s7=4%, s8=13%, s9=24%] (sum=176%)
49-
cpu_util#1: last: [s1=0.20, s2=0.22, s3=0.14, s4=0.08, s5=0.09, s6=0.11, s7=0.03, s8=0.06, s9=0.09] (stddev=0.06, mean=0.11, sum=1)
50-
cpu_util#1: thrash_pct: [s1=13%, s2=49%, s3=46%, s4=8%, s5=9%, s6=10%, s7=4%, s8=13%, s9=24%] (sum=176%)
47+
cpu#1: last: [s1=563639640, s2=579095747, s3=561019502, s4=561591114, s5=560289458, s6=563986913, s7=561058911, s8=559174716, s9=561248205] (stddev=5708749.39, mean=563456022.89, sum=5071104206)
48+
cpu#1: thrash_pct: [s1=61%, s2=93%, s3=85%, s4=33%, s5=25%, s6=31%, s7=26%, s8=31%, s9=38%] (sum=423%)
49+
cpu_util#1: last: [s1=0.11, s2=0.12, s3=0.11, s4=0.11, s5=0.11, s6=0.11, s7=0.11, s8=0.11, s9=0.11] (stddev=0.00, mean=0.11, sum=1)
50+
cpu_util#1: thrash_pct: [s1=61%, s2=93%, s3=85%, s4=33%, s5=25%, s6=31%, s7=26%, s8=31%, s9=38%] (sum=423%)
5151
leases#1: first: [s1=36, s2=0, s3=0, s4=36, s5=0, s6=0, s7=36, s8=0, s9=0] (stddev=16.97, mean=12.00, sum=108)
52-
leases#1: last: [s1=13, s2=12, s3=12, s4=17, s5=11, s6=9, s7=15, s8=10, s9=9] (stddev=2.54, mean=12.00, sum=108)
53-
leases#1: thrash_pct: [s1=53%, s2=38%, s3=44%, s4=21%, s5=20%, s6=26%, s7=15%, s8=26%, s9=37%] (sum=281%)
52+
leases#1: last: [s1=8, s2=12, s3=10, s4=17, s5=11, s6=15, s7=15, s8=12, s9=8] (stddev=2.98, mean=12.00, sum=108)
53+
leases#1: thrash_pct: [s1=90%, s2=91%, s3=78%, s4=64%, s5=44%, s6=33%, s7=40%, s8=38%, s9=66%] (sum=544%)
5454
replicas#1: first: [s1=36, s2=36, s3=36, s4=36, s5=36, s6=36, s7=36, s8=36, s9=36] (stddev=0.00, mean=36.00, sum=324)
55-
replicas#1: last: [s1=36, s2=37, s3=33, s4=37, s5=36, s6=36, s7=36, s8=35, s9=38] (stddev=1.33, mean=36.00, sum=324)
56-
replicas#1: thrash_pct: [s1=433%, s2=402%, s3=672%, s4=135%, s5=117%, s6=217%, s7=117%, s8=218%, s9=403%] (sum=2713%)
57-
write_bytes_per_second#1: last: [s1=5620, s2=5631, s3=5274, s4=6278, s5=6149, s6=6141, s7=6163, s8=5851, s9=6287] (stddev=336.21, mean=5932.67, sum=53394)
58-
write_bytes_per_second#1: thrash_pct: [s1=559%, s2=643%, s3=817%, s4=499%, s5=526%, s6=579%, s7=550%, s8=547%, s9=686%] (sum=5406%)
59-
artifacts[sma-count]: f4911f910a2a6031
55+
replicas#1: last: [s1=36, s2=38, s3=34, s4=36, s5=34, s6=36, s7=36, s8=38, s9=36] (stddev=1.33, mean=36.00, sum=324)
56+
replicas#1: thrash_pct: [s1=814%, s2=660%, s3=474%, s4=343%, s5=288%, s6=271%, s7=229%, s8=146%, s9=486%] (sum=3711%)
57+
write_bytes_per_second#1: last: [s1=5732, s2=6066, s3=5520, s4=6053, s5=5656, s6=5984, s7=6012, s8=6348, s9=5994] (stddev=236.75, mean=5929.44, sum=53365)
58+
write_bytes_per_second#1: thrash_pct: [s1=1071%, s2=1051%, s3=901%, s4=774%, s5=695%, s6=717%, s7=684%, s8=692%, s9=916%] (sum=7501%)
59+
artifacts[sma-count]: 9f6f3c953198a939
6060
==========================
6161
cpu#1: last: [s1=569562730, s2=572002191, s3=578085131, s4=557945087, s5=554889308, s6=555817692, s7=569231042, s8=555128356, s9=558243133] (stddev=8275220.45, mean=563433852.22, sum=5070904670)
6262
cpu#1: thrash_pct: [s1=9%, s2=76%, s3=61%, s4=9%, s5=22%, s6=9%, s7=10%, s8=11%, s9=17%] (sum=225%)

pkg/kv/kvserver/asim/tests/testdata/non_rand/sma/multi_store_lease.txt

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,17 @@ asserting: |leases(t)/mean_{T}(leases) - 1| = 0.00 ∀ t∈T and each store (T=l
2424
eval duration=5m seed=42 metrics=(leases) cfgs=(sma-count,mma-only)
2525
----
2626
leases#1: first: [s1=8, s2=3, s3=0, s4=0, s5=0, s6=0, s7=0, s8=0, s9=1, s10=0, s11=1, s12=0, s13=1, s14=0] (stddev=2.10, mean=1.00, sum=14)
27-
leases#1: last: [s1=1, s2=1, s3=1, s4=1, s5=1, s6=1, s7=1, s8=1, s9=1, s10=1, s11=1, s12=1, s13=1, s14=1] (stddev=0.00, mean=1.00, sum=14)
28-
leases#1: thrash_pct: [s1=0%, s2=0%, s3=0%, s4=0%, s5=0%, s6=0%, s7=0%, s8=25%, s9=25%, s10=0%, s11=25%, s12=0%, s13=50%, s14=25%] (sum=150%)
29-
artifacts[sma-count]: 8eaf9b7414b3c382
27+
leases#1: last: [s1=4, s2=2, s3=0, s4=0, s5=0, s6=0, s7=1, s8=0, s9=1, s10=1, s11=2, s12=1, s13=1, s14=1] (stddev=1.07, mean=1.00, sum=14)
28+
leases#1: thrash_pct: [s1=0%, s2=0%, s3=0%, s4=0%, s5=0%, s6=0%, s7=0%, s8=0%, s9=25%, s10=0%, s11=0%, s12=0%, s13=50%, s14=25%] (sum=100%)
29+
artifacts[sma-count]: ad563e290094f1bf
30+
failed assertion sample 1
31+
balance stat=leases threshold=(≤1.00) ticks=6
32+
max/mean=4.00 tick=0
33+
max/mean=4.00 tick=1
34+
max/mean=4.00 tick=2
35+
max/mean=4.00 tick=3
36+
max/mean=4.00 tick=4
37+
max/mean=4.00 tick=5
3038
==========================
3139
leases#1: first: [s1=8, s2=3, s3=0, s4=0, s5=0, s6=0, s7=0, s8=0, s9=1, s10=0, s11=1, s12=0, s13=1, s14=0] (stddev=2.10, mean=1.00, sum=14)
3240
leases#1: last: [s1=3, s2=1, s3=0, s4=0, s5=0, s6=1, s7=1, s8=1, s9=2, s10=0, s11=2, s12=1, s13=2, s14=0] (stddev=0.93, mean=1.00, sum=14)

pkg/kv/kvserver/asim/tests/testdata/non_rand/sma/rebalancing.txt

Lines changed: 46 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -42,15 +42,31 @@ asserting: |qps(t)/mean_{T}(qps) - 1| ≤ 0.05 ∀ t∈T and each store (T=last
4242
# printed. If no assertions fail, then OK is printed.
4343
eval duration=3m samples=2 seed=42 metrics=(qps,replica_moves) cfgs=(sma-count,mma-only) full=true
4444
----
45-
qps#1: last: [s1=996, s2=1001, s3=999, s4=994, s5=1001, s6=1002, s7=1007] (stddev=3.93, mean=1000.00, sum=7000)
46-
qps#1: thrash_pct: [s1=12%, s2=9%, s3=4%, s4=8%, s5=5%, s6=6%, s7=4%] (sum=48%)
47-
replica_moves#1: last: [s1=2, s2=1, s3=0, s4=1, s5=1, s6=1, s7=0] (stddev=0.64, mean=0.86, sum=6)
45+
qps#1: last: [s1=2997, s2=1998, s3=0, s4=0, s5=1001, s6=1002, s7=0] (stddev=1067.98, mean=999.71, sum=6998)
46+
qps#1: thrash_pct: [s1=8%, s2=8%, s3=0%, s4=0%, s5=5%, s6=6%, s7=0%] (sum=27%)
47+
replica_moves#1: last: [s1=2, s2=1, s3=0, s4=0, s5=1, s6=1, s7=0] (stddev=0.70, mean=0.71, sum=5)
4848
replica_moves#1: thrash_pct: [s1=0%, s2=0%, s3=0%, s4=0%, s5=0%, s6=0%, s7=0%] (sum=0%)
49-
qps#2: last: [s1=999, s2=999, s3=997, s4=998, s5=997, s6=1002, s7=998] (stddev=1.59, mean=998.57, sum=6990)
50-
qps#2: thrash_pct: [s1=11%, s2=8%, s3=4%, s4=103%, s5=4%, s6=105%, s7=5%] (sum=240%)
51-
replica_moves#2: last: [s1=2, s2=1, s3=0, s4=1, s5=0, s6=1, s7=0] (stddev=0.70, mean=0.71, sum=5)
49+
qps#2: last: [s1=2997, s2=2000, s3=0, s4=0, s5=0, s6=2001, s7=0] (stddev=1194.63, mean=999.71, sum=6998)
50+
qps#2: thrash_pct: [s1=7%, s2=7%, s3=0%, s4=0%, s5=0%, s6=11%, s7=0%] (sum=25%)
51+
replica_moves#2: last: [s1=2, s2=1, s3=0, s4=0, s5=0, s6=1, s7=0] (stddev=0.73, mean=0.57, sum=4)
5252
replica_moves#2: thrash_pct: [s1=0%, s2=0%, s3=0%, s4=0%, s5=0%, s6=0%, s7=0%] (sum=0%)
53-
artifacts[sma-count]: df2fd03851620873
53+
artifacts[sma-count]: a725af49d5bb0f06
54+
failed assertion sample 1
55+
balance stat=qps threshold=(≤1.15) ticks=6
56+
max/mean=3.00 tick=0
57+
max/mean=3.00 tick=1
58+
max/mean=3.00 tick=2
59+
max/mean=3.00 tick=3
60+
max/mean=3.00 tick=4
61+
max/mean=3.00 tick=5
62+
failed assertion sample 2
63+
balance stat=qps threshold=(≤1.15) ticks=6
64+
max/mean=3.00 tick=0
65+
max/mean=3.00 tick=1
66+
max/mean=3.00 tick=2
67+
max/mean=3.00 tick=3
68+
max/mean=3.00 tick=4
69+
max/mean=3.00 tick=5
5470
==========================
5571
qps#1: last: [s1=3998, s2=1998, s3=0, s4=0, s5=0, s6=1002, s7=0] (stddev=1413.41, mean=999.71, sum=6998)
5672
qps#1: thrash_pct: [s1=5%, s2=6%, s3=0%, s4=0%, s5=0%, s6=4%, s7=0%] (sum=15%)
@@ -108,15 +124,31 @@ setting gossip_delay=20s
108124
# thrashing on the fourth sample here.
109125
eval duration=5m samples=2 seed=42 metrics=(qps,replica_moves) cfgs=(sma-count,mma-only)
110126
----
111-
qps#1: last: [s1=994, s2=999, s3=1005, s4=1000, s5=998, s6=1002, s7=999] (stddev=3.16, mean=999.57, sum=6997)
112-
qps#1: thrash_pct: [s1=117%, s2=59%, s3=54%, s4=53%, s5=104%, s6=105%, s7=53%] (sum=543%)
113-
replica_moves#1: last: [s1=5, s2=4, s3=0, s4=4, s5=3, s6=5, s7=2] (stddev=1.67, mean=3.29, sum=23)
127+
qps#1: last: [s1=2004, s2=0, s3=0, s4=0, s5=3001, s6=0, s7=1994] (stddev=1195.23, mean=999.86, sum=6999)
128+
qps#1: thrash_pct: [s1=115%, s2=3%, s3=0%, s4=0%, s5=8%, s6=55%, s7=8%] (sum=189%)
129+
replica_moves#1: last: [s1=5, s2=4, s3=0, s4=0, s5=4, s6=4, s7=2] (stddev=1.91, mean=2.71, sum=19)
114130
replica_moves#1: thrash_pct: [s1=0%, s2=0%, s3=0%, s4=0%, s5=0%, s6=0%, s7=0%] (sum=0%)
115-
qps#2: last: [s1=1001, s2=1003, s3=994, s4=997, s5=1002, s6=1001, s7=998] (stddev=2.97, mean=999.43, sum=6996)
116-
qps#2: thrash_pct: [s1=256%, s2=47%, s3=43%, s4=2%, s5=284%, s6=126%, s7=125%] (sum=882%)
117-
replica_moves#2: last: [s1=12, s2=7, s3=1, s4=0, s5=10, s6=8, s7=3] (stddev=4.26, mean=5.86, sum=41)
131+
qps#2: last: [s1=1001, s2=0, s3=0, s4=1001, s5=0, s6=3003, s7=1995] (stddev=1069.18, mean=1000.00, sum=7000)
132+
qps#2: thrash_pct: [s1=269%, s2=2%, s3=0%, s4=3%, s5=258%, s6=8%, s7=60%] (sum=600%)
133+
replica_moves#2: last: [s1=11, s2=4, s3=0, s4=0, s5=9, s6=12, s7=3] (stddev=4.69, mean=5.57, sum=39)
118134
replica_moves#2: thrash_pct: [s1=0%, s2=0%, s3=0%, s4=0%, s5=0%, s6=0%, s7=0%] (sum=0%)
119-
artifacts[sma-count]: 1bd16e771629608b
135+
artifacts[sma-count]: 83a77bd5552b8bfe
136+
failed assertion sample 1
137+
balance stat=qps threshold=(≤1.15) ticks=6
138+
max/mean=3.00 tick=0
139+
max/mean=3.00 tick=1
140+
max/mean=3.00 tick=2
141+
max/mean=3.00 tick=3
142+
max/mean=3.00 tick=4
143+
max/mean=3.00 tick=5
144+
failed assertion sample 2
145+
balance stat=qps threshold=(≤1.15) ticks=6
146+
max/mean=3.00 tick=0
147+
max/mean=3.00 tick=1
148+
max/mean=3.00 tick=2
149+
max/mean=3.00 tick=3
150+
max/mean=3.00 tick=4
151+
max/mean=3.00 tick=5
120152
==========================
121153
qps#1: last: [s1=4002, s2=1995, s3=0, s4=0, s5=0, s6=1002, s7=0] (stddev=1414.32, mean=999.86, sum=6999)
122154
qps#1: thrash_pct: [s1=6%, s2=6%, s3=0%, s4=0%, s5=0%, s6=4%, s7=0%] (sum=16%)

0 commit comments

Comments
 (0)