Skip to content

Commit 5fa2c3b

Browse files
committed
asim: make LoadBasedRebalancingObjective configurable
Previously, the simulator was updated to use cluster setting LoadBasedRebalancingObjective, which defaults to CPU-based rebalancing. However, the goal of rebalancing_qps.txt is to rebalance based on QPS. This commit introduces LoadBasedRebalancingObjective as a configurable simulation setting, allowing it to be adjusted via a cluster setting. And it updates rebalancing_qps to explicitly set the rebalancing objective to QPS, restoring its original intent after the recent simulator change.
1 parent 82b154f commit 5fa2c3b

File tree

3 files changed

+33
-51
lines changed

3 files changed

+33
-51
lines changed

pkg/kv/kvserver/asim/state/impl.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1402,6 +1402,8 @@ func (s *state) SetClusterSetting(Key string, Value interface{}) {
14021402
switch Key {
14031403
case "LBRebalancingMode":
14041404
kvserverbase.LoadBasedRebalancingMode.Override(context.Background(), &s.settings.ST.SV, kvserverbase.LBRebalancingMode(Value.(int64)))
1405+
case "LBRebalancingObjective":
1406+
kvserver.LoadBasedRebalancingObjective.Override(context.Background(), &s.settings.ST.SV, kvserver.LBRebalancingObjective(Value.(int64)))
14051407
default:
14061408
panic("other cluster settings not supported")
14071409
}

pkg/kv/kvserver/asim/tests/datadriven_simulation_test.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -745,6 +745,17 @@ func TestDataDriven(t *testing.T) {
745745
Value: rebalanceMode,
746746
}})
747747
}
748+
749+
var rebalanceObjective int64
750+
if scanIfExists(t, d, "rebalance_objective", &rebalanceObjective) {
751+
events = append(events, scheduled.ScheduledEvent{
752+
At: settingsGen.Settings.StartTime.Add(delay),
753+
TargetEvent: event.SetSimulationSettingsEvent{
754+
IsClusterSetting: true,
755+
Key: "LBRebalancingObjective",
756+
Value: rebalanceObjective,
757+
}})
758+
}
748759
return ""
749760
default:
750761
return fmt.Sprintf("unknown command: %s", d.Cmd)

pkg/kv/kvserver/asim/tests/testdata/non_rand/sma/rebalancing_qps.txt

Lines changed: 20 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,5 @@
11
# This test verifies that the allocator can rebalance qps load across a 7-node
22
# cluster. mma-only fails here since it doesn't balance based on leases.
3-
# TODO(tbg): this test works because the default rebalancing objective in asim
4-
# was accidentally left as 'qps' (even though we use 'cpu' in production).
5-
# When the default changes, this test needs to override the setting and probably
6-
# rename the test to specify that it's about qps rebalancing in particular,
7-
# which otherwise sees little explicit testing.
83
skip_under_ci
94
----
105

@@ -35,38 +30,27 @@ assertion stat=qps type=steady ticks=6 upper_bound=0.05
3530
----
3631
asserting: |qps(t)/mean_{T}(qps) - 1| ≤ 0.05 ∀ t∈T and each store (T=last 6 ticks)
3732

33+
34+
# Set rebalance objective to 0 to disable rebalancing.
35+
setting rebalance_objective=0
36+
----
37+
3838
# The generators are then called and 2 simulation runs, named samples are
3939
# created and evaluated. Each sample has a fixed duration of 3 minutes.
4040
# Following the evaluation, the samples are checked individually against the
4141
# existing assertions, added above. If any assertion fails, the reason is
4242
# printed. If no assertions fail, then OK is printed.
4343
eval duration=3m samples=2 seed=42 metrics=(qps,replica_moves) cfgs=(sma-count,mma-only) full=true
4444
----
45-
qps#1: last: [s1=2997, s2=1998, s3=0, s4=0, s5=1001, s6=1002, s7=0] (stddev=1067.98, mean=999.71, sum=6998)
46-
qps#1: thrash_pct: [s1=8%, s2=8%, s3=0%, s4=0%, s5=5%, s6=6%, s7=0%] (sum=27%)
47-
replica_moves#1: last: [s1=2, s2=1, s3=0, s4=0, s5=1, s6=1, s7=0] (stddev=0.70, mean=0.71, sum=5)
45+
qps#1: last: [s1=996, s2=1001, s3=999, s4=994, s5=1001, s6=1002, s7=1007] (stddev=3.93, mean=1000.00, sum=7000)
46+
qps#1: thrash_pct: [s1=12%, s2=9%, s3=4%, s4=8%, s5=5%, s6=6%, s7=4%] (sum=48%)
47+
replica_moves#1: last: [s1=2, s2=1, s3=0, s4=1, s5=1, s6=1, s7=0] (stddev=0.64, mean=0.86, sum=6)
4848
replica_moves#1: thrash_pct: [s1=0%, s2=0%, s3=0%, s4=0%, s5=0%, s6=0%, s7=0%] (sum=0%)
49-
qps#2: last: [s1=2997, s2=2000, s3=0, s4=0, s5=0, s6=2001, s7=0] (stddev=1194.63, mean=999.71, sum=6998)
50-
qps#2: thrash_pct: [s1=7%, s2=7%, s3=0%, s4=0%, s5=0%, s6=11%, s7=0%] (sum=25%)
51-
replica_moves#2: last: [s1=2, s2=1, s3=0, s4=0, s5=0, s6=1, s7=0] (stddev=0.73, mean=0.57, sum=4)
49+
qps#2: last: [s1=999, s2=999, s3=997, s4=998, s5=997, s6=1002, s7=998] (stddev=1.59, mean=998.57, sum=6990)
50+
qps#2: thrash_pct: [s1=11%, s2=8%, s3=4%, s4=103%, s5=4%, s6=105%, s7=5%] (sum=240%)
51+
replica_moves#2: last: [s1=2, s2=1, s3=0, s4=1, s5=0, s6=1, s7=0] (stddev=0.70, mean=0.71, sum=5)
5252
replica_moves#2: thrash_pct: [s1=0%, s2=0%, s3=0%, s4=0%, s5=0%, s6=0%, s7=0%] (sum=0%)
53-
artifacts[sma-count]: a725af49d5bb0f06
54-
failed assertion sample 1
55-
balance stat=qps threshold=(≤1.15) ticks=6
56-
max/mean=3.00 tick=0
57-
max/mean=3.00 tick=1
58-
max/mean=3.00 tick=2
59-
max/mean=3.00 tick=3
60-
max/mean=3.00 tick=4
61-
max/mean=3.00 tick=5
62-
failed assertion sample 2
63-
balance stat=qps threshold=(≤1.15) ticks=6
64-
max/mean=3.00 tick=0
65-
max/mean=3.00 tick=1
66-
max/mean=3.00 tick=2
67-
max/mean=3.00 tick=3
68-
max/mean=3.00 tick=4
69-
max/mean=3.00 tick=5
53+
artifacts[sma-count]: df2fd03851620873
7054
==========================
7155
qps#1: last: [s1=3998, s2=1998, s3=0, s4=0, s5=0, s6=1002, s7=0] (stddev=1413.41, mean=999.71, sum=6998)
7256
qps#1: thrash_pct: [s1=5%, s2=6%, s3=0%, s4=0%, s5=0%, s6=4%, s7=0%] (sum=15%)
@@ -101,6 +85,7 @@ Cluster Set Up
10185
Key Space
10286
[0,10000): 7(rf=3), 0MiB, [s1:(7,4*),s2:(6,2*),s3:(3,0*),s4:(2,0*),s5:(1,0*),s6:(1,1*),s7:(1,0*)]
10387
Event
88+
set LBRebalancingObjective to 0
10489
set LBRebalancingMode to 2
10590
Workload Set Up
10691
[0,10000): 95%r large-block [128-256B/op, 7000ops/s]
@@ -124,31 +109,15 @@ setting gossip_delay=20s
124109
# thrashing on the fourth sample here.
125110
eval duration=5m samples=2 seed=42 metrics=(qps,replica_moves) cfgs=(sma-count,mma-only)
126111
----
127-
qps#1: last: [s1=2004, s2=0, s3=0, s4=0, s5=3001, s6=0, s7=1994] (stddev=1195.23, mean=999.86, sum=6999)
128-
qps#1: thrash_pct: [s1=115%, s2=3%, s3=0%, s4=0%, s5=8%, s6=55%, s7=8%] (sum=189%)
129-
replica_moves#1: last: [s1=5, s2=4, s3=0, s4=0, s5=4, s6=4, s7=2] (stddev=1.91, mean=2.71, sum=19)
112+
qps#1: last: [s1=994, s2=999, s3=1005, s4=1000, s5=998, s6=1002, s7=999] (stddev=3.16, mean=999.57, sum=6997)
113+
qps#1: thrash_pct: [s1=117%, s2=59%, s3=54%, s4=53%, s5=104%, s6=105%, s7=53%] (sum=543%)
114+
replica_moves#1: last: [s1=5, s2=4, s3=0, s4=4, s5=3, s6=5, s7=2] (stddev=1.67, mean=3.29, sum=23)
130115
replica_moves#1: thrash_pct: [s1=0%, s2=0%, s3=0%, s4=0%, s5=0%, s6=0%, s7=0%] (sum=0%)
131-
qps#2: last: [s1=1001, s2=0, s3=0, s4=1001, s5=0, s6=3003, s7=1995] (stddev=1069.18, mean=1000.00, sum=7000)
132-
qps#2: thrash_pct: [s1=269%, s2=2%, s3=0%, s4=3%, s5=258%, s6=8%, s7=60%] (sum=600%)
133-
replica_moves#2: last: [s1=11, s2=4, s3=0, s4=0, s5=9, s6=12, s7=3] (stddev=4.69, mean=5.57, sum=39)
116+
qps#2: last: [s1=1001, s2=1003, s3=994, s4=997, s5=1002, s6=1001, s7=998] (stddev=2.97, mean=999.43, sum=6996)
117+
qps#2: thrash_pct: [s1=256%, s2=47%, s3=43%, s4=2%, s5=284%, s6=126%, s7=125%] (sum=882%)
118+
replica_moves#2: last: [s1=12, s2=7, s3=1, s4=0, s5=10, s6=8, s7=3] (stddev=4.26, mean=5.86, sum=41)
134119
replica_moves#2: thrash_pct: [s1=0%, s2=0%, s3=0%, s4=0%, s5=0%, s6=0%, s7=0%] (sum=0%)
135-
artifacts[sma-count]: 83a77bd5552b8bfe
136-
failed assertion sample 1
137-
balance stat=qps threshold=(≤1.15) ticks=6
138-
max/mean=3.00 tick=0
139-
max/mean=3.00 tick=1
140-
max/mean=3.00 tick=2
141-
max/mean=3.00 tick=3
142-
max/mean=3.00 tick=4
143-
max/mean=3.00 tick=5
144-
failed assertion sample 2
145-
balance stat=qps threshold=(≤1.15) ticks=6
146-
max/mean=3.00 tick=0
147-
max/mean=3.00 tick=1
148-
max/mean=3.00 tick=2
149-
max/mean=3.00 tick=3
150-
max/mean=3.00 tick=4
151-
max/mean=3.00 tick=5
120+
artifacts[sma-count]: 1bd16e771629608b
152121
==========================
153122
qps#1: last: [s1=4002, s2=1995, s3=0, s4=0, s5=0, s6=1002, s7=0] (stddev=1414.32, mean=999.86, sum=6999)
154123
qps#1: thrash_pct: [s1=6%, s2=6%, s3=0%, s4=0%, s5=0%, s6=4%, s7=0%] (sum=16%)

0 commit comments

Comments
 (0)