From 66912fea6b77514b93678f611012f62adccaa715 Mon Sep 17 00:00:00 2001 From: vporyadke Date: Wed, 5 Nov 2025 14:31:24 +0100 Subject: [PATCH 1/2] fix stuck reassign actor (#28195) --- ydb/core/mind/hive/monitoring.cpp | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/ydb/core/mind/hive/monitoring.cpp b/ydb/core/mind/hive/monitoring.cpp index ca1830c75cd9..9248554b1027 100644 --- a/ydb/core/mind/hive/monitoring.cpp +++ b/ydb/core/mind/hive/monitoring.cpp @@ -2880,7 +2880,7 @@ class TTxMonEvent_RebalanceFromScratch : public TTransactionBase { class TReassignTabletWaitActor : public TActor, public ISubActor { public: TActorId Source; - ui32 TabletsTotal = std::numeric_limits::max(); + ui32 TabletsTotal = 0; ui32 TabletsDone = 0; THive* Hive; @@ -2907,14 +2907,23 @@ class TReassignTabletWaitActor : public TActor, public return SelfId().LocalId(); } - void Handle(TEvPrivate::TEvRestartComplete::TPtr&) { - ++TabletsDone; + void AddTablet(TLeaderTabletInfo* tablet) { + tablet->ActorsToNotifyOnRestart.push_back(SelfId()); + ++TabletsTotal; + } + + void CheckCompletion() { if (TabletsDone >= TabletsTotal) { Send(Source, new NMon::TEvRemoteJsonInfoRes(TStringBuilder() << "{\"total\":" << TabletsDone << "}")); PassAway(); } } + void Handle(TEvPrivate::TEvRestartComplete::TPtr&) { + ++TabletsDone; + CheckCompletion(); + } + STATEFN(StateWork) { switch (ev->GetTypeRewrite()) { cFunc(TEvents::TSystem::PoisonPill, PassAway); @@ -3042,12 +3051,12 @@ class TTxMonEvent_ReassignTablet : public TTransactionBase { continue; } if (Wait) { - tablet->ActorsToNotifyOnRestart.emplace_back(waitActorId); // volatile settings, will not persist upon restart + waitActor->AddTablet(tablet); } operations.emplace_back(new TEvHive::TEvReassignTablet(tablet->Id, channels, forcedGroupIds, Async)); } if (Wait) { - waitActor->TabletsTotal = operations.size(); + waitActor->CheckCompletion(); } for (auto& op : operations) { ctx.Send(Self->SelfId(), op.Release()); From 3c2e809f2b0d1ccf0edf5830ab38f7fb83f9c3e7 Mon Sep 17 00:00:00 2001 From: vporyadke Date: Wed, 12 Nov 2025 16:30:28 +0300 Subject: [PATCH 2/2] test for invalid reassign (#28404) --- ydb/core/mind/hive/hive_ut.cpp | 63 ++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/ydb/core/mind/hive/hive_ut.cpp b/ydb/core/mind/hive/hive_ut.cpp index 46ddba9dad96..56f5b1364ce6 100644 --- a/ydb/core/mind/hive/hive_ut.cpp +++ b/ydb/core/mind/hive/hive_ut.cpp @@ -8229,6 +8229,69 @@ Y_UNIT_TEST_SUITE(THiveTest) { } } + Y_UNIT_TEST(TestReassignNonexistentTablet) { + TTestBasicRuntime runtime(1, false); + Setup(runtime, true); + + const ui64 hiveTablet = MakeDefaultHiveID(); + const ui64 testerTablet = MakeTabletID(false, 1); + const TActorId hiveActor = CreateTestBootstrapper(runtime, CreateTestTabletInfo(hiveTablet, TTabletTypes::Hive), &CreateDefaultHive); + runtime.EnableScheduleForActor(hiveActor); + MakeSureTabletIsUp(runtime, hiveTablet, 0); + TActorId sender = runtime.AllocateEdgeActor(0); + + { + TDispatchOptions options; + options.FinalEvents.emplace_back(TEvLocal::EvSyncTablets); + runtime.DispatchEvents(options); + } + + THolder createTablet = MakeHolder(testerTablet, 1, TTabletTypes::Dummy, BINDED_CHANNELS); + ui64 tablet = SendCreateTestTablet(runtime, hiveTablet, testerTablet, std::move(createTablet), 0, true); + + MakeSureTabletIsUp(runtime, tablet, 0); + + { + NActorsProto::TRemoteHttpInfo pb; + pb.SetMethod(HTTP_METHOD_POST); + pb.SetPath("/app"); + auto* p1 = pb.AddQueryParams(); + p1->SetKey("TabletID"); + p1->SetValue(TStringBuilder() << hiveTablet); + auto* p2 = pb.AddQueryParams(); + p2->SetKey("page"); + p2->SetValue("ReassignTablet"); + auto* p3 = pb.AddQueryParams(); + p3->SetKey("tablet"); + p3->SetValue("52"); + runtime.SendToPipe(hiveTablet, sender, new NMon::TEvRemoteHttpInfo(std::move(pb)), 0, GetPipeConfigWithRetries()); + + TAutoPtr handle; + auto resp = runtime.GrabEdgeEventRethrow(handle); + Ctest << "Hive response: " << resp->Json << Endl; + NJson::TJsonValue value; + ReadJsonTree(resp->Json, &value, false); + UNIT_ASSERT_VALUES_EQUAL(value["total"].GetIntegerSafe(), 0); + } + + // this must not block balancer + + { + THolder metrics = MakeHolder(); + NKikimrHive::TTabletMetrics* metric = metrics->Record.AddTabletMetrics(); + metric->SetTabletID(tablet); + metric->MutableResourceUsage()->SetNetwork(9000); + + runtime.SendToPipe(hiveTablet, sender, metrics.Release()); + } + + { + TDispatchOptions options; + options.FinalEvents.push_back(NHive::TEvPrivate::EvBalancerOut); + runtime.DispatchEvents(options); + } + } + Y_UNIT_TEST(TestTabletsStartingCounter) { TTestBasicRuntime runtime(1, false); Setup(runtime, true);