Skip to content

Commit 070b55b

Browse files
alexpanimanqyryq
authored andcommitted
Introduce a flexible framework for topology-based robust join order optimizer benchmarks (ydb-platform#27065)
1 parent b26f7d1 commit 070b55b

File tree

13 files changed

+2697
-5
lines changed

13 files changed

+2697
-5
lines changed

ydb/core/kqp/opt/logical/kqp_opt_log.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,7 @@ class TKqpLogicalOptTransformer : public TOptimizeTransformerBase {
175175
}
176176

177177
TMaybeNode<TExprBase> OptimizeEquiJoinWithCosts(TExprBase node, TExprContext& ctx) {
178-
TCBOSettings settings {
178+
TCBOSettings settings{
179179
.MaxDPhypDPTableSize = Config->MaxDPHypDPTableSize.Get().GetOrElse(TDqSettings::TDefault::MaxDPHypDPTableSize),
180180
.ShuffleEliminationJoinNumCutoff = Config->ShuffleEliminationJoinNumCutoff.Get().GetOrElse(TDqSettings::TDefault::ShuffleEliminationJoinNumCutoff)
181181
};
Lines changed: 266 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,266 @@
1+
#pragma once
2+
3+
#include <cassert>
4+
#include <chrono>
5+
#include <cstdint>
6+
#include <map>
7+
#include <sstream>
8+
#include <string>
9+
#include <type_traits>
10+
#include <regex>
11+
#include <stdexcept>
12+
13+
namespace NKikimr::NKqp {
14+
15+
class TArgs {
16+
public:
17+
template <typename TValue>
18+
class TRangedValueIter {
19+
public:
20+
TRangedValueIter(TValue current, TValue end, TValue step)
21+
: Current_(current)
22+
, End_(end)
23+
, Step_(step)
24+
{
25+
}
26+
27+
TValue operator*() const {
28+
return Current_;
29+
}
30+
31+
TRangedValueIter& operator++() {
32+
Current_ += Step_;
33+
if (Current_ >= End_) {
34+
Current_ = End_;
35+
}
36+
37+
return *this;
38+
}
39+
40+
bool operator!=(TRangedValueIter other) const {
41+
assert(Step_ == other.Step_);
42+
return Current_ != other.Current_;
43+
}
44+
45+
private:
46+
TValue Current_;
47+
TValue End_;
48+
TValue Step_;
49+
};
50+
51+
template <typename TValue>
52+
class TRangedValue {
53+
public:
54+
TRangedValue(TValue from, TValue to, TValue step)
55+
: IsRange_(true)
56+
, From_(from)
57+
, To_(to)
58+
, Step_(step)
59+
{
60+
}
61+
62+
TRangedValue(TValue from)
63+
: IsRange_(false)
64+
, From_(from)
65+
, To_(from)
66+
, Step_(1)
67+
{
68+
}
69+
70+
bool IsRange() const {
71+
return IsRange_;
72+
}
73+
74+
TRangedValueIter<TValue> end() const {
75+
TValue End = To_ + 1; // immediately after the last
76+
return TRangedValueIter<TValue>{End, End, Step_};
77+
}
78+
79+
TRangedValueIter<TValue> begin() const {
80+
return TRangedValueIter{From_, *end(), Step_};
81+
}
82+
83+
TValue GetValue() const {
84+
return From_;
85+
}
86+
87+
TValue GetFirst() const {
88+
return From_;
89+
}
90+
91+
TValue GetLast() const {
92+
return To_;
93+
}
94+
95+
TValue GetStep() const {
96+
return Step_;
97+
}
98+
99+
private:
100+
bool IsRange_;
101+
102+
TValue From_;
103+
TValue To_;
104+
TValue Step_;
105+
};
106+
107+
public:
108+
TArgs(std::string input)
109+
: Values_(ParseMap(input))
110+
{
111+
}
112+
113+
std::string GetString(std::string key) {
114+
if (!HasArg(key)) {
115+
throw std::out_of_range("arg not provided: '" + key + "'");
116+
}
117+
return Values_[key];
118+
}
119+
120+
std::string GetStringOrDefault(std::string key, std::string defaultValue) {
121+
if (HasArg(key)) {
122+
return GetString(key);
123+
}
124+
125+
return defaultValue;
126+
}
127+
128+
template <typename TValue>
129+
auto GetArg(std::string key) {
130+
return ParseRangedValue<TValue>(GetString(key));
131+
}
132+
133+
template <typename TValue>
134+
auto GetArgOrDefault(std::string key, std::string defaultSerialized) {
135+
if (HasArg(key)) {
136+
return GetArg<TValue>(key);
137+
}
138+
return ParseRangedValue<TValue>(defaultSerialized);
139+
}
140+
141+
bool HasArg(std::string key) {
142+
return Values_.contains(key);
143+
}
144+
145+
private:
146+
std::map<std::string, std::string> Values_;
147+
148+
private:
149+
static void LTrim(std::string& input) {
150+
input.erase(input.begin(), std::find_if(input.begin(), input.end(), [](unsigned char ch) {
151+
return !std::isspace(ch);
152+
}));
153+
}
154+
155+
static void RTrim(std::string& input) {
156+
input.erase(std::find_if(input.rbegin(), input.rend(), [](unsigned char ch) {
157+
return !std::isspace(ch);
158+
}).base(), input.end());
159+
}
160+
161+
static void Trim(std::string& input) {
162+
LTrim(input);
163+
RTrim(input);
164+
}
165+
166+
static std::map<std::string, std::string> ParseMap(const std::string& input, char delimiter = ';') {
167+
std::map<std::string, std::string> result;
168+
std::stringstream ss(input);
169+
170+
std::string entry;
171+
while (std::getline(ss, entry, delimiter)) {
172+
// each entry looks like key value pair, e.g. "N=5"
173+
Trim(entry);
174+
size_t pos = entry.find('=');
175+
176+
if (pos != std::string::npos) {
177+
std::string key = entry.substr(0, pos);
178+
std::string value = entry.substr(pos + 1);
179+
Trim(value);
180+
result[std::move(key)] = std::move(value);
181+
}
182+
}
183+
184+
return result;
185+
}
186+
187+
template <typename TValue>
188+
static auto ParseRangedValue(const std::string& input) {
189+
// Check if it contains ".."
190+
size_t dotdot = input.find("..");
191+
192+
if (dotdot == std::string::npos) {
193+
// parse fixed value
194+
auto value = ParseValue<TValue>(input);
195+
return TRangedValue<decltype(value)>{value};
196+
} else {
197+
// parse ranged (with step or without)
198+
size_t comma = input.find(',');
199+
200+
auto to = ParseValue<TValue>(input.substr(dotdot + 2));
201+
if (comma != std::string::npos && comma < dotdot) {
202+
// parse ranges like "0.1,0.2..1.0"
203+
auto first = ParseValue<TValue>(input.substr(0, comma));
204+
auto second = ParseValue<TValue>(input.substr(comma + 1, dotdot - comma - 1));
205+
auto step = second - first;
206+
return TRangedValue<decltype(first)>{first, to, step};
207+
}
208+
209+
// parse ranges like "1..100"
210+
auto first = ParseValue<TValue>(input.substr(0, dotdot));
211+
return TRangedValue<decltype(first)>{first, to, /*default step=*/1};
212+
}
213+
}
214+
215+
template <typename TValue>
216+
static auto ParseValue(const std::string& input) {
217+
if constexpr (std::is_same_v<TValue, double>) {
218+
return std::stod(input);
219+
} else if constexpr (std::is_same_v<TValue, uint64_t>) {
220+
return static_cast<uint64_t>(std::stoull(input));
221+
} else if constexpr (std::is_same_v<TValue, int64_t>) {
222+
return static_cast<int64_t>(std::stoll(input));
223+
} else if constexpr (std::is_same_v<TValue, std::string>) {
224+
return input;
225+
} else if constexpr (std::is_same_v<TValue, std::chrono::nanoseconds>) {
226+
return static_cast<ui64>(ParseDuration(input).count());
227+
} else {
228+
static_assert(false, "Unhandled type");
229+
}
230+
}
231+
232+
static std::chrono::nanoseconds ParseDuration(const std::string& input) {
233+
std::regex pattern(R"((\d+(?:\.\d+)?)\s*(ns|us|ms|s|m|h))");
234+
std::smatch match;
235+
236+
if (!std::regex_match(input, match, pattern)) {
237+
throw std::invalid_argument("Invalid duration format");
238+
}
239+
240+
double value = std::stod(match[1]);
241+
std::string unit = match[2];
242+
243+
if (unit == "ns") {
244+
return std::chrono::nanoseconds(static_cast<uint64_t>(value));
245+
}
246+
if (unit == "us") {
247+
return std::chrono::microseconds(static_cast<uint64_t>(value));
248+
}
249+
if (unit == "ms") {
250+
return std::chrono::milliseconds(static_cast<uint64_t>(value));
251+
}
252+
if (unit == "s") {
253+
return std::chrono::seconds(static_cast<uint64_t>(value));
254+
}
255+
if (unit == "m") {
256+
return std::chrono::minutes(static_cast<uint64_t>(value));
257+
}
258+
if (unit == "h") {
259+
return std::chrono::hours(static_cast<uint64_t>(value));
260+
}
261+
262+
throw std::invalid_argument("Unknown unit");
263+
}
264+
};
265+
266+
} // namespace NKikimr::NKqp

0 commit comments

Comments
 (0)