Skip to content

Commit 6c9b104

Browse files
committed
[ICRDMA] Support for XDC transver via RDMA. EXT-1506 (#28557)
Co-authored-by: Robert Drynkin <robdrynkin@ydb.tech> Co-authored-by: Daniil Cherednik <dcherednik@ydb.tech> Initial RDMA support for XDC. Conflicts: ydb/library/actors/core/event_pb.h ydb/library/actors/interconnect/ya.make
1 parent af4768c commit 6c9b104

30 files changed

+1280
-96
lines changed

ydb/library/actors/core/event.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,9 @@ namespace NActors {
3939
}
4040
virtual ui32 Type() const = 0;
4141
virtual bool SerializeToArcadiaStream(TChunkSerializer*) const = 0;
42+
virtual std::optional<TRope> SerializeToRope(std::function<TRcBuf(ui32 size)>) const {
43+
return std::nullopt;
44+
}
4245
virtual bool IsSerializable() const = 0;
4346
virtual ui32 CalculateSerializedSizeCached() const {
4447
return CalculateSerializedSize();

ydb/library/actors/core/event_load.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ namespace NActors {
2525
size_t Tailroom = 0; // tailroom for the chunk
2626
size_t Alignment = 0; // required alignment
2727
bool IsInline = false; // if true, goes through ordinary channel
28+
bool IsRdmaCapable = false; // if true, could go through RDMA
2829
};
2930

3031
struct TEventSerializationInfo {

ydb/library/actors/core/event_pb.cpp

Lines changed: 103 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
#include "event_pb.h"
22

3+
#include <ydb/library/actors/interconnect/rdma/mem_pool.h>
4+
#include <ydb/library/actors/protos/interconnect.pb.h>
5+
36
namespace NActors {
47
TString EventPBBaseToString(const TString& header, const TString& dbgStr) {
58
TString res;
@@ -259,26 +262,9 @@ namespace NActors {
259262
return res;
260263
}
261264

262-
bool SerializeToArcadiaStreamImpl(TChunkSerializer* chunker, const TVector<TRope> &payload) {
263-
// serialize payload first
265+
template<typename TCb>
266+
bool SerializeHeaderCommon(const TVector<TRope>& payload, TCb& append) {
264267
if (payload) {
265-
void *data;
266-
int size = 0;
267-
auto append = [&](const char *p, size_t len) {
268-
while (len) {
269-
if (size) {
270-
const size_t numBytesToCopy = std::min<size_t>(size, len);
271-
memcpy(data, p, numBytesToCopy);
272-
data = static_cast<char*>(data) + numBytesToCopy;
273-
size -= numBytesToCopy;
274-
p += numBytesToCopy;
275-
len -= numBytesToCopy;
276-
} else if (!chunker->Next(&data, &size)) {
277-
return false;
278-
}
279-
}
280-
return true;
281-
};
282268
auto appendNumber = [&](size_t number) {
283269
char buf[MaxNumberBytes];
284270
return append(buf, SerializeNumber(number, buf));
@@ -294,19 +280,86 @@ namespace NActors {
294280
return false;
295281
}
296282
}
297-
if (size) {
298-
chunker->BackUp(std::exchange(size, 0));
283+
}
284+
285+
return true;
286+
}
287+
288+
bool SerializePayloadCommon(const TVector<TRope> &payload, std::function<bool(TRope)> append) {
289+
for (const TRope& rope : payload) {
290+
if (!append(rope)) {
291+
return false;
299292
}
300-
for (const TRope& rope : payload) {
301-
if (!chunker->WriteRope(&rope)) {
293+
}
294+
return true;
295+
}
296+
297+
bool SerializeToArcadiaStreamImpl(TChunkSerializer* chunker, const TVector<TRope> &payload) {
298+
// serialize payload first
299+
void *data;
300+
int size = 0;
301+
auto append = [&](const char *p, size_t len) {
302+
while (len) {
303+
if (size) {
304+
const size_t numBytesToCopy = std::min<size_t>(size, len);
305+
memcpy(data, p, numBytesToCopy);
306+
data = static_cast<char*>(data) + numBytesToCopy;
307+
size -= numBytesToCopy;
308+
p += numBytesToCopy;
309+
len -= numBytesToCopy;
310+
} else if (!chunker->Next(&data, &size)) {
302311
return false;
303312
}
304313
}
314+
return true;
315+
};
316+
if (!SerializeHeaderCommon(payload, append)) {
317+
return false;
318+
}
319+
if (size) {
320+
chunker->BackUp(std::exchange(size, 0));
321+
}
322+
323+
auto appendRope = [&](TRope rope) {
324+
if (!chunker->WriteRope(&rope)) {
325+
return false;
326+
}
327+
return true;
328+
};
329+
if (!SerializePayloadCommon(payload, appendRope)) {
330+
return false;
305331
}
306332

307333
return true;
308334
}
309335

336+
std::optional<TRope> SerializeToRopeImpl(std::function<TRcBuf(ui32 size)> alloc, const TVector<TRope> &payload) {
337+
TRope result;
338+
auto sz = CalculateSerializedHeaderSizeImpl(payload);
339+
if (!sz) {
340+
return result;
341+
}
342+
TRcBuf headerBuf = alloc(sz);
343+
if (!headerBuf) {
344+
return {};
345+
}
346+
char* data = headerBuf.GetDataMut();
347+
auto append = [&data](const char *p, size_t len) {
348+
std::memcpy(data, p, len);
349+
data += len;
350+
return true;
351+
};
352+
SerializeHeaderCommon(payload, append);
353+
result.Insert(result.End(), headerBuf);
354+
355+
auto appendRope = [&](TRope rope) {
356+
result.Insert(result.End(), std::move(rope));
357+
return true;
358+
};
359+
SerializePayloadCommon(payload, appendRope);
360+
361+
return result;
362+
}
310363

311364
void ParseExtendedFormatPayload(TRope::TConstIterator &iter, size_t &size, TVector<TRope> &payload, size_t &totalPayloadSize)
312365
{
@@ -356,23 +409,41 @@ namespace NActors {
356409
}
357410
}
358411

359-
ui32 CalculateSerializedSizeImpl(const TVector<TRope> &payload, ssize_t recordSize) {
360-
ssize_t result = recordSize;
361-
if (result >= 0 && payload) {
412+
ui32 CalculateSerializedHeaderSizeImpl(const TVector<TRope> &payload) {
413+
ui32 result = 0;
414+
if (payload) {
362415
++result; // marker
363416
char buf[MaxNumberBytes];
364417
result += SerializeNumber(payload.size(), buf);
365-
size_t totalPayloadSize = 0;
366418
for (const TRope& rope : payload) {
367419
size_t ropeSize = rope.GetSize();
368-
totalPayloadSize += ropeSize;
369420
result += SerializeNumber(ropeSize, buf);
370421
}
371-
result += totalPayloadSize;
372422
}
373423
return result;
374424
}
375425

426+
ui32 CalculateSerializedSizeImpl(const TVector<TRope> &payload, ssize_t recordSize) {
427+
ssize_t result = recordSize;
428+
if (result >= 0 && payload) {
429+
result += CalculateSerializedHeaderSizeImpl(payload);
430+
for (const TRope& rope : payload) {
431+
result += rope.GetSize();
432+
}
433+
}
434+
return result;
435+
}
436+
437+
bool IsRdma(const TRope &rope) {
438+
for (auto it = rope.Begin(); it != rope.End(); ++it) {
439+
const TRcBuf& chunk = it.GetChunk();
440+
if (NInterconnect::NRdma::TryExtractFromRcBuf(chunk).Empty()) {
441+
return false;
442+
}
443+
}
444+
return true;
445+
}
446+
376447
TEventSerializationInfo CreateSerializationInfoImpl(size_t preserializedSize, bool allowExternalDataChannel, const TVector<TRope> &payload, ssize_t recordSize) {
377448
TEventSerializationInfo info;
378449
info.IsExtendedFormat = static_cast<bool>(payload);
@@ -384,14 +455,14 @@ namespace NActors {
384455
for (const TRope& rope : payload) {
385456
headerLen += SerializeNumber(rope.size(), temp);
386457
}
387-
info.Sections.push_back(TEventSectionInfo{0, headerLen, 0, 0, true});
458+
info.Sections.push_back(TEventSectionInfo{0, headerLen, 0, 0, true, true});
388459
for (const TRope& rope : payload) {
389-
info.Sections.push_back(TEventSectionInfo{0, rope.size(), 0, 0, false});
460+
info.Sections.push_back(TEventSectionInfo{0, rope.size(), 0, 0, false, IsRdma(rope)});
390461
}
391462
}
392463

393464
const size_t byteSize = Max<ssize_t>(0, recordSize) + preserializedSize;
394-
info.Sections.push_back(TEventSectionInfo{0, byteSize, 0, 0, true}); // protobuf itself
465+
info.Sections.push_back(TEventSectionInfo{0, byteSize, 0, 0, true, true}); // protobuf itself
395466

396467
#ifndef NDEBUG
397468
size_t total = 0;

ydb/library/actors/core/event_pb.h

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ namespace NActors {
8282
~TCoroutineChunkSerializer();
8383

8484
void SetSerializingEvent(const IEventBase *event);
85+
void DiscardEvent() { Event = nullptr; };
8586
void Abort();
8687
std::span<TChunk> FeedBuf(void* data, size_t size);
8788
bool IsComplete() const {
@@ -150,6 +151,8 @@ namespace NActors {
150151

151152
void ParseExtendedFormatPayload(TRope::TConstIterator &iter, size_t &size, TVector<TRope> &payload, size_t &totalPayloadSize);
152153
bool SerializeToArcadiaStreamImpl(TChunkSerializer* chunker, const TVector<TRope> &payload);
154+
ui32 CalculateSerializedHeaderSizeImpl(const TVector<TRope> &payload);
155+
std::optional<TRope> SerializeToRopeImpl(std::function<TRcBuf(ui32 size)> alloc, const TVector<TRope> &payload);
153156
ui32 CalculateSerializedSizeImpl(const TVector<TRope> &payload, ssize_t recordSize);
154157
TEventSerializationInfo CreateSerializationInfoImpl(size_t preserializedSize, bool allowExternalDataChannel, const TVector<TRope> &payload, ssize_t recordSize);
155158

@@ -202,8 +205,25 @@ namespace NActors {
202205
return CalculateSerializedSizeImpl(Payload, Record.ByteSize());
203206
}
204207

205-
static IEventBase* Load(TEventSerializedData *input) {
206-
THolder<TEventPBBase> ev(new TEv());
208+
std::optional<TRope> SerializeToRope(std::function<TRcBuf(ui32 size)> alloc) const override {
209+
std::optional<TRope> result = SerializeToRopeImpl(alloc, Payload);
210+
if (!result) {
211+
return {};
212+
}
213+
ui32 size = Record.ByteSizeLong();
214+
TRcBuf recordsSerializedBuf = alloc(size);
215+
if (!recordsSerializedBuf) {
216+
return {};
217+
}
218+
bool serializationDone = Record.SerializePartialToArray(recordsSerializedBuf.GetDataMut(), size);
219+
Y_ABORT_UNLESS(serializationDone);
220+
result->Insert(result->End(), std::move(recordsSerializedBuf));
221+
return result;
222+
}
223+
224+
static TEv* Load(const TEventSerializedData *input) {
225+
THolder<TEv> holder(new TEv());
226+
TEventPBBase* ev = holder.Get();
207227
if (!input->GetSize()) {
208228
Y_PROTOBUF_SUPPRESS_NODISCARD ev->Record.ParseFromString(TString());
209229
} else {
@@ -221,7 +241,7 @@ namespace NActors {
221241
}
222242
}
223243
ev->CachedByteSize = input->GetSize();
224-
return ev.Release();
244+
return holder.Release();
225245
}
226246

227247
size_t GetCachedByteSize() const {

ydb/library/actors/interconnect/channel_scheduler.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ namespace NActors {
1414
std::shared_ptr<IInterconnectMetrics> Metrics;
1515
const ui32 MaxSerializedEventSize;
1616
const TSessionParams Params;
17+
std::shared_ptr<NInterconnect::NRdma::IMemPool> RdmaMemPool;
1718

1819
struct THeapItem {
1920
TEventOutputChannel *Channel;
@@ -29,11 +30,12 @@ namespace NActors {
2930
public:
3031
TChannelScheduler(ui32 peerNodeId, const TChannelsConfig& predefinedChannels,
3132
std::shared_ptr<IInterconnectMetrics> metrics, ui32 maxSerializedEventSize,
32-
TSessionParams params)
33+
TSessionParams params, std::shared_ptr<NInterconnect::NRdma::IMemPool> rdmaMemPool)
3334
: PeerNodeId(peerNodeId)
3435
, Metrics(std::move(metrics))
3536
, MaxSerializedEventSize(maxSerializedEventSize)
3637
, Params(std::move(params))
38+
, RdmaMemPool(std::move(rdmaMemPool))
3739
{
3840
for (const auto& item : predefinedChannels) {
3941
GetOutputChannel(item.first);
@@ -71,15 +73,15 @@ namespace NActors {
7173
auto& res = ChannelArray[channel];
7274
if (Y_UNLIKELY(!res)) {
7375
res.emplace(channel, PeerNodeId, MaxSerializedEventSize, Metrics,
74-
Params);
76+
Params, RdmaMemPool);
7577
}
7678
return *res;
7779
} else {
7880
auto it = ChannelMap.find(channel);
7981
if (Y_UNLIKELY(it == ChannelMap.end())) {
8082
it = ChannelMap.emplace(std::piecewise_construct, std::forward_as_tuple(channel),
8183
std::forward_as_tuple(channel, PeerNodeId, MaxSerializedEventSize,
82-
Metrics, Params)).first;
84+
Metrics, Params, RdmaMemPool)).first;
8385
}
8486
return it->second;
8587
}

0 commit comments

Comments
 (0)