Skip to content

Commit 5700ad6

Browse files
committed
Add CBO parameter to force Shuffle Elimination
1 parent 4ef71a8 commit 5700ad6

File tree

9 files changed

+39
-19
lines changed

9 files changed

+39
-19
lines changed

ydb/core/kqp/opt/logical/kqp_opt_log.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -175,13 +175,17 @@ class TKqpLogicalOptTransformer : public TOptimizeTransformerBase {
175175
}
176176

177177
TMaybeNode<TExprBase> OptimizeEquiJoinWithCosts(TExprBase node, TExprContext& ctx) {
178-
auto maxDPhypDPTableSize = Config->MaxDPHypDPTableSize.Get().GetOrElse(TDqSettings::TDefault::MaxDPHypDPTableSize);
178+
TCBOSettings settings {
179+
.MaxDPhypDPTableSize = Config->MaxDPHypDPTableSize.Get().GetOrElse(TDqSettings::TDefault::MaxDPHypDPTableSize),
180+
.ForceShuffleElimination = Config->ForceShuffleElimination.Get().GetOrElse(TDqSettings::TDefault::ForceShuffleElimination)
181+
};
182+
179183
auto optLevel = Config->CostBasedOptimizationLevel.Get().GetOrElse(Config->DefaultCostBasedOptimizationLevel);
180184
bool enableShuffleElimination = KqpCtx.Config->OptShuffleElimination.Get().GetOrElse(KqpCtx.Config->DefaultEnableShuffleElimination);
181185
auto providerCtx = TKqpProviderContext(KqpCtx, optLevel);
182186
auto stats = TypesCtx.GetStats(node.Raw());
183187
TTableAliasMap* tableAliases = stats? stats->TableAliases.Get(): nullptr;
184-
auto opt = std::unique_ptr<IOptimizerNew>(MakeNativeOptimizerNew(providerCtx, maxDPhypDPTableSize, ctx, enableShuffleElimination, TypesCtx.OrderingsFSM, tableAliases));
188+
auto opt = std::unique_ptr<IOptimizerNew>(MakeNativeOptimizerNew(providerCtx, settings, ctx, enableShuffleElimination, TypesCtx.OrderingsFSM, tableAliases));
185189
TExprBase output = DqOptimizeEquiJoinWithCosts(node, ctx, TypesCtx, optLevel,
186190
*opt, [](auto& rels, auto label, auto node, auto stat) {
187191
rels.emplace_back(std::make_shared<TKqpRelOptimizerNode>(TString(label), *stat, node));

ydb/core/kqp/provider/yql_kikimr_settings.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,7 @@ TKikimrConfiguration::TKikimrConfiguration() {
124124
REGISTER_SETTING(*this, UseBlockReader);
125125

126126
REGISTER_SETTING(*this, MaxDPHypDPTableSize);
127+
REGISTER_SETTING(*this, ForceShuffleElimination);
127128

128129
REGISTER_SETTING(*this, MaxTasksPerStage);
129130
REGISTER_SETTING(*this, DataSizePerPartition);

ydb/core/kqp/provider/yql_kikimr_settings.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ struct TKikimrSettings {
9292
NCommon::TConfSetting<NDq::EHashShuffleFuncType , Static> ColumnShardHashShuffleFuncType;
9393

9494
NCommon::TConfSetting<ui32, Static> MaxDPHypDPTableSize;
95+
NCommon::TConfSetting<bool, Static> ForceShuffleElimination;
9596

9697
NCommon::TConfSetting<ui32, Static> MaxTasksPerStage;
9798
NCommon::TConfSetting<ui64, Static> DataSizePerPartition;

ydb/library/yql/dq/opt/dq_opt_join_cbo_factory.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@ namespace NYql::NDq {
99
namespace {
1010
class TDqOptimizerFactory : public IOptimizerFactory {
1111
public:
12-
virtual IOptimizerNew::TPtr MakeJoinCostBasedOptimizerNative(IProviderContext& pctx, TExprContext& ectx, const TNativeSettings& settings) const override {
13-
return IOptimizerNew::TPtr(MakeNativeOptimizerNew(pctx, settings.MaxDPhypDPTableSize, ectx, false, nullptr));
12+
virtual IOptimizerNew::TPtr MakeJoinCostBasedOptimizerNative(IProviderContext& pctx, TExprContext& ectx, const TCBOSettings& settings) const override {
13+
return IOptimizerNew::TPtr(MakeNativeOptimizerNew(pctx, settings, ectx, false, nullptr));
1414
}
1515

1616
virtual IOptimizerNew::TPtr MakeJoinCostBasedOptimizerPG(IProviderContext& pctx, TExprContext& ctx, const TPGSettings& settings) const override {

ydb/library/yql/dq/opt/dq_opt_join_cost_based.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -314,14 +314,14 @@ class TOptimizerNativeNew: public IOptimizerNew {
314314
public:
315315
TOptimizerNativeNew(
316316
IProviderContext& ctx,
317-
ui32 maxDPhypDPTableSize,
317+
const TCBOSettings &optimizerSettings,
318318
TExprContext& exprCtx,
319319
bool enableShuffleElimination,
320320
TSimpleSharedPtr<TOrderingsStateMachine> orderingsFSM,
321321
TTableAliasMap* tableAliases
322322
)
323323
: IOptimizerNew(ctx)
324-
, MaxDPHypTableSize_(maxDPhypDPTableSize)
324+
, OptimizerSettings_(optimizerSettings)
325325
, ExprCtx(exprCtx)
326326
, EnableShuffleElimination(enableShuffleElimination && orderingsFSM != nullptr)
327327
, OrderingsFSM(orderingsFSM)
@@ -334,7 +334,7 @@ class TOptimizerNativeNew: public IOptimizerNew {
334334
) override {
335335
auto relsCount = joinTree->Labels().size();
336336

337-
if (EnableShuffleElimination && relsCount <= 14) {
337+
if (EnableShuffleElimination && (relsCount <= 14 || OptimizerSettings_.ForceShuffleElimination)) {
338338
return JoinSearchImpl<TNodeSet64, TDPHypSolverShuffleElimination<TNodeSet64>>(joinTree, false, hints);
339339
} else if (relsCount <= 64) { // The algorithm is more efficient.
340340
return JoinSearchImpl<TNodeSet64, TDPHypSolverClassic<TNodeSet64>>(joinTree, EnableShuffleElimination, hints);
@@ -382,7 +382,7 @@ class TOptimizerNativeNew: public IOptimizerNew {
382382
TJoinHypergraph<TNodeSet> hypergraph = MakeJoinHypergraph<TNodeSet>(joinTree, hints);
383383
TDPHypImpl solver = GetDPHypImpl<TNodeSet, TDPHypImpl>(hypergraph);
384384
YQL_CLOG(TRACE, CoreDq) << "Enumeration algorithm chosen: " << solver.Type();
385-
if (solver.CountCC(MaxDPHypTableSize_) >= MaxDPHypTableSize_) {
385+
if (solver.CountCC(OptimizerSettings_.MaxDPhypDPTableSize) >= OptimizerSettings_.MaxDPhypDPTableSize) {
386386
YQL_CLOG(TRACE, CoreDq) << "Maximum DPhyp threshold exceeded";
387387
ExprCtx.AddWarning(
388388
YqlIssue(
@@ -506,7 +506,7 @@ class TOptimizerNativeNew: public IOptimizerNew {
506506
}
507507

508508
private:
509-
ui32 MaxDPHypTableSize_;
509+
TCBOSettings OptimizerSettings_;
510510
TExprContext& ExprCtx;
511511
bool EnableShuffleElimination;
512512

@@ -516,13 +516,13 @@ class TOptimizerNativeNew: public IOptimizerNew {
516516

517517
IOptimizerNew* MakeNativeOptimizerNew(
518518
IProviderContext& pctx,
519-
const ui32 maxDPhypDPTableSize,
519+
const TCBOSettings &settings,
520520
TExprContext& ectx,
521521
bool enableShuffleElimination,
522522
TSimpleSharedPtr<TOrderingsStateMachine> orderingsFSM,
523523
TTableAliasMap* tableAliases
524524
) {
525-
return new TOptimizerNativeNew(pctx, maxDPhypDPTableSize, ectx, enableShuffleElimination, orderingsFSM, tableAliases);
525+
return new TOptimizerNativeNew(pctx, settings, ectx, enableShuffleElimination, orderingsFSM, tableAliases);
526526
}
527527

528528
void CollectInterestingOrderingsFromJoinTree(

ydb/library/yql/dq/opt/dq_opt_join_cost_based.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ void CollectInterestingOrderingsFromJoinTree(
5050

5151
IOptimizerNew* MakeNativeOptimizerNew(
5252
IProviderContext& ctx,
53-
const ui32 maxDPHypDPTableSize,
53+
const TCBOSettings &settings,
5454
TExprContext& ectx,
5555
bool enableShuffleElimination,
5656
TSimpleSharedPtr<TOrderingsStateMachine> orderingsFSM = nullptr,

ydb/library/yql/dq/opt/ut/dq_cbo_ut.cpp

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -34,13 +34,17 @@ Y_UNIT_TEST_SUITE(DQCBO) {
3434
Y_UNIT_TEST(Empty) {
3535
TBaseProviderContext pctx;
3636
TExprContext dummyCtx;
37-
std::unique_ptr<IOptimizerNew> optimizer = std::unique_ptr<IOptimizerNew>(MakeNativeOptimizerNew(pctx, 100000, dummyCtx, false));
37+
38+
TCBOSettings settings{};
39+
std::unique_ptr<IOptimizerNew> optimizer = std::unique_ptr<IOptimizerNew>(MakeNativeOptimizerNew(pctx, settings, dummyCtx, false));
3840
}
3941

4042
Y_UNIT_TEST(JoinSearch2Rels) {
4143
TBaseProviderContext pctx;
4244
TExprContext dummyCtx;
43-
std::unique_ptr<IOptimizerNew> optimizer = std::unique_ptr<IOptimizerNew>(MakeNativeOptimizerNew(pctx, 100000, dummyCtx, false));
45+
46+
TCBOSettings settings{};
47+
std::unique_ptr<IOptimizerNew> optimizer = std::unique_ptr<IOptimizerNew>(MakeNativeOptimizerNew(pctx, settings, dummyCtx, false));
4448

4549
auto rel1 = std::make_shared<TRelOptimizerNode>(
4650
"a",
@@ -74,7 +78,9 @@ Y_UNIT_TEST(JoinSearch2Rels) {
7478
Y_UNIT_TEST(JoinSearch3Rels) {
7579
TBaseProviderContext pctx;
7680
TExprContext dummyCtx;
77-
std::unique_ptr<IOptimizerNew> optimizer = std::unique_ptr<IOptimizerNew>(MakeNativeOptimizerNew(pctx, 100000, dummyCtx, false));
81+
82+
TCBOSettings settings{};
83+
std::unique_ptr<IOptimizerNew> optimizer = std::unique_ptr<IOptimizerNew>(MakeNativeOptimizerNew(pctx, settings, dummyCtx, false));
7884

7985
auto rel1 = std::make_shared<TRelOptimizerNode>("a",
8086
TOptimizerStatistics(BaseTable, 100000, 1, 0, 1000000));
@@ -121,7 +127,9 @@ Y_UNIT_TEST(JoinSearchYQL19363) {
121127
// Verify that JoinSearch() correctly handles dot and comma characters.
122128
TBaseProviderContext pctx;
123129
TExprContext dummyCtx;
124-
std::unique_ptr<IOptimizerNew> optimizer = std::unique_ptr<IOptimizerNew>(MakeNativeOptimizerNew(pctx, 100000, dummyCtx, false));
130+
131+
TCBOSettings settings{};
132+
std::unique_ptr<IOptimizerNew> optimizer = std::unique_ptr<IOptimizerNew>(MakeNativeOptimizerNew(pctx, settings, dummyCtx, false));
125133

126134
TString relName1 = "a,b.c";
127135
TString colName1 = "a.x";
@@ -228,7 +236,9 @@ struct TMockProviderContextYT24403 : public TBaseProviderContext {
228236
Y_UNIT_TEST(JoinSearchYT24403) {
229237
TMockProviderContextYT24403 pctx;
230238
TExprContext dummyCtx;
231-
std::unique_ptr<IOptimizerNew> optimizer = std::unique_ptr<IOptimizerNew>(MakeNativeOptimizerNew(pctx, 100000, dummyCtx, false));
239+
240+
TCBOSettings settings{};
241+
std::unique_ptr<IOptimizerNew> optimizer = std::unique_ptr<IOptimizerNew>(MakeNativeOptimizerNew(pctx, settings, dummyCtx, false));
232242

233243
const TString relName1 = "a";
234244
const TString relName2 = "b";
@@ -368,7 +378,8 @@ Y_UNIT_TEST(DqOptimizeEquiJoinWithCostsNative) {
368378
TExprContext ctx;
369379
TBaseProviderContext pctx;
370380
std::function<IOptimizerNew*()> optFactory = [&]() {
371-
return MakeNativeOptimizerNew(pctx, 100000, ctx, false);
381+
TCBOSettings settings{};
382+
return MakeNativeOptimizerNew(pctx, settings, ctx, false);
372383
};
373384
_DqOptimizeEquiJoinWithCosts(optFactory, ctx);
374385
}

ydb/library/yql/dq/opt/ut/dq_opt_hypergraph_ut.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,10 @@ template <typename TProviderContext = TTestContext>
5252
std::shared_ptr<IBaseOptimizerNode> Enumerate(const std::shared_ptr<IBaseOptimizerNode>& root, const TOptimizerHints& hints = {}) {
5353
auto ctx = TProviderContext();
5454
TExprContext dummyCtx;
55+
56+
TCBOSettings settings{};
5557
auto optimizer =
56-
std::unique_ptr<IOptimizerNew>(MakeNativeOptimizerNew(ctx, std::numeric_limits<ui32>::max(), dummyCtx, false));
58+
std::unique_ptr<IOptimizerNew>(MakeNativeOptimizerNew(ctx, settings, dummyCtx, false));
5759

5860
Y_ENSURE(root->Kind == EOptimizerNodeKind::JoinNodeType);
5961
auto res = optimizer->JoinSearch(std::static_pointer_cast<TJoinOptimizerNode>(root), hints);

ydb/library/yql/providers/dq/common/yql_dq_settings.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ struct TDqSettings {
6464
static constexpr ESpillingEngine SpillingEngine = ESpillingEngine::Disable;
6565
static constexpr ui32 CostBasedOptimizationLevel = 4;
6666
static constexpr ui32 MaxDPHypDPTableSize = 95'000U;
67+
static constexpr bool ForceShuffleElimination = false;
6768
static constexpr ui64 MaxAttachmentsSize = 2_GB;
6869
static constexpr bool SplitStageOnDqReplicate = true;
6970
static constexpr ui64 EnableSpillingNodes = 0;

0 commit comments

Comments
 (0)