Skip to content

Commit 1d07d89

Browse files
dcherednikqyryq
authored andcommitted
[ICRDMA] Support for XDC transver via RDMA. EXT-1506 (ydb-platform#28557)
Co-authored-by: Robert Drynkin <robdrynkin@ydb.tech> Co-authored-by: Daniil Cherednik <dcherednik@ydb.tech> Initial RDMA support for XDC.
1 parent 6b301fe commit 1d07d89

30 files changed

+1271
-90
lines changed

ydb/library/actors/core/event.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,9 @@ namespace NActors {
3333
}
3434
virtual ui32 Type() const = 0;
3535
virtual bool SerializeToArcadiaStream(TChunkSerializer*) const = 0;
36+
virtual std::optional<TRope> SerializeToRope(std::function<TRcBuf(ui32 size)>) const {
37+
return std::nullopt;
38+
}
3639
virtual bool IsSerializable() const = 0;
3740
virtual ui32 CalculateSerializedSizeCached() const {
3841
return CalculateSerializedSize();

ydb/library/actors/core/event_load.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ namespace NActors {
2525
size_t Tailroom = 0; // tailroom for the chunk
2626
size_t Alignment = 0; // required alignment
2727
bool IsInline = false; // if true, goes through ordinary channel
28+
bool IsRdmaCapable = false; // if true, could go through RDMA
2829
};
2930

3031
struct TEventSerializationInfo {

ydb/library/actors/core/event_pb.cpp

Lines changed: 103 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
#include "event_pb.h"
22

3+
#include <ydb/library/actors/interconnect/rdma/mem_pool.h>
4+
#include <ydb/library/actors/protos/interconnect.pb.h>
5+
36
namespace NActors {
47
TString EventPBBaseToString(const TString& header, const TString& dbgStr) {
58
TString res;
@@ -264,26 +267,9 @@ namespace NActors {
264267
return res;
265268
}
266269

267-
bool SerializeToArcadiaStreamImpl(TChunkSerializer* chunker, const TVector<TRope> &payload) {
268-
// serialize payload first
270+
template<typename TCb>
271+
bool SerializeHeaderCommon(const TVector<TRope>& payload, TCb& append) {
269272
if (payload) {
270-
void *data;
271-
int size = 0;
272-
auto append = [&](const char *p, size_t len) {
273-
while (len) {
274-
if (size) {
275-
const size_t numBytesToCopy = std::min<size_t>(size, len);
276-
memcpy(data, p, numBytesToCopy);
277-
data = static_cast<char*>(data) + numBytesToCopy;
278-
size -= numBytesToCopy;
279-
p += numBytesToCopy;
280-
len -= numBytesToCopy;
281-
} else if (!chunker->Next(&data, &size)) {
282-
return false;
283-
}
284-
}
285-
return true;
286-
};
287273
auto appendNumber = [&](size_t number) {
288274
char buf[MaxNumberBytes];
289275
return append(buf, SerializeNumber(number, buf));
@@ -299,19 +285,86 @@ namespace NActors {
299285
return false;
300286
}
301287
}
302-
if (size) {
303-
chunker->BackUp(std::exchange(size, 0));
288+
}
289+
290+
return true;
291+
}
292+
293+
bool SerializePayloadCommon(const TVector<TRope> &payload, std::function<bool(TRope)> append) {
294+
for (const TRope& rope : payload) {
295+
if (!append(rope)) {
296+
return false;
304297
}
305-
for (const TRope& rope : payload) {
306-
if (!chunker->WriteRope(&rope)) {
298+
}
299+
return true;
300+
}
301+
302+
bool SerializeToArcadiaStreamImpl(TChunkSerializer* chunker, const TVector<TRope> &payload) {
303+
// serialize payload first
304+
void *data;
305+
int size = 0;
306+
auto append = [&](const char *p, size_t len) {
307+
while (len) {
308+
if (size) {
309+
const size_t numBytesToCopy = std::min<size_t>(size, len);
310+
memcpy(data, p, numBytesToCopy);
311+
data = static_cast<char*>(data) + numBytesToCopy;
312+
size -= numBytesToCopy;
313+
p += numBytesToCopy;
314+
len -= numBytesToCopy;
315+
} else if (!chunker->Next(&data, &size)) {
307316
return false;
308317
}
309318
}
319+
return true;
320+
};
321+
if (!SerializeHeaderCommon(payload, append)) {
322+
return false;
323+
}
324+
if (size) {
325+
chunker->BackUp(std::exchange(size, 0));
326+
}
327+
328+
auto appendRope = [&](TRope rope) {
329+
if (!chunker->WriteRope(&rope)) {
330+
return false;
331+
}
332+
return true;
333+
};
334+
if (!SerializePayloadCommon(payload, appendRope)) {
335+
return false;
310336
}
311337

312338
return true;
313339
}
314340

341+
std::optional<TRope> SerializeToRopeImpl(std::function<TRcBuf(ui32 size)> alloc, const TVector<TRope> &payload) {
342+
TRope result;
343+
auto sz = CalculateSerializedHeaderSizeImpl(payload);
344+
if (!sz) {
345+
return result;
346+
}
347+
TRcBuf headerBuf = alloc(sz);
348+
if (!headerBuf) {
349+
return {};
350+
}
351+
char* data = headerBuf.GetDataMut();
352+
auto append = [&data](const char *p, size_t len) {
353+
std::memcpy(data, p, len);
354+
data += len;
355+
return true;
356+
};
357+
SerializeHeaderCommon(payload, append);
358+
result.Insert(result.End(), headerBuf);
359+
360+
auto appendRope = [&](TRope rope) {
361+
result.Insert(result.End(), std::move(rope));
362+
return true;
363+
};
364+
SerializePayloadCommon(payload, appendRope);
365+
366+
return result;
367+
}
315368

316369
void ParseExtendedFormatPayload(TRope::TConstIterator &iter, size_t &size, TVector<TRope> &payload, size_t &totalPayloadSize)
317370
{
@@ -361,23 +414,41 @@ namespace NActors {
361414
}
362415
}
363416

364-
ui32 CalculateSerializedSizeImpl(const TVector<TRope> &payload, ssize_t recordSize) {
365-
ssize_t result = recordSize;
366-
if (result >= 0 && payload) {
417+
ui32 CalculateSerializedHeaderSizeImpl(const TVector<TRope> &payload) {
418+
ui32 result = 0;
419+
if (payload) {
367420
++result; // marker
368421
char buf[MaxNumberBytes];
369422
result += SerializeNumber(payload.size(), buf);
370-
size_t totalPayloadSize = 0;
371423
for (const TRope& rope : payload) {
372424
size_t ropeSize = rope.GetSize();
373-
totalPayloadSize += ropeSize;
374425
result += SerializeNumber(ropeSize, buf);
375426
}
376-
result += totalPayloadSize;
377427
}
378428
return result;
379429
}
380430

431+
ui32 CalculateSerializedSizeImpl(const TVector<TRope> &payload, ssize_t recordSize) {
432+
ssize_t result = recordSize;
433+
if (result >= 0 && payload) {
434+
result += CalculateSerializedHeaderSizeImpl(payload);
435+
for (const TRope& rope : payload) {
436+
result += rope.GetSize();
437+
}
438+
}
439+
return result;
440+
}
441+
442+
bool IsRdma(const TRope &rope) {
443+
for (auto it = rope.Begin(); it != rope.End(); ++it) {
444+
const TRcBuf& chunk = it.GetChunk();
445+
if (NInterconnect::NRdma::TryExtractFromRcBuf(chunk).Empty()) {
446+
return false;
447+
}
448+
}
449+
return true;
450+
}
451+
381452
TEventSerializationInfo CreateSerializationInfoImpl(size_t preserializedSize, bool allowExternalDataChannel, const TVector<TRope> &payload, ssize_t recordSize) {
382453
TEventSerializationInfo info;
383454
info.IsExtendedFormat = static_cast<bool>(payload);
@@ -389,14 +460,14 @@ namespace NActors {
389460
for (const TRope& rope : payload) {
390461
headerLen += SerializeNumber(rope.size(), temp);
391462
}
392-
info.Sections.push_back(TEventSectionInfo{0, headerLen, 0, 0, true});
463+
info.Sections.push_back(TEventSectionInfo{0, headerLen, 0, 0, true, true});
393464
for (const TRope& rope : payload) {
394-
info.Sections.push_back(TEventSectionInfo{0, rope.size(), 0, 0, false});
465+
info.Sections.push_back(TEventSectionInfo{0, rope.size(), 0, 0, false, IsRdma(rope)});
395466
}
396467
}
397468

398469
const size_t byteSize = Max<ssize_t>(0, recordSize) + preserializedSize;
399-
info.Sections.push_back(TEventSectionInfo{0, byteSize, 0, 0, true}); // protobuf itself
470+
info.Sections.push_back(TEventSectionInfo{0, byteSize, 0, 0, true, true}); // protobuf itself
400471

401472
#ifndef NDEBUG
402473
size_t total = 0;

ydb/library/actors/core/event_pb.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ namespace NActors {
8282
~TCoroutineChunkSerializer();
8383

8484
void SetSerializingEvent(const IEventBase *event);
85+
void DiscardEvent() { Event = nullptr; };
8586
void Abort();
8687
std::span<TChunk> FeedBuf(void* data, size_t size);
8788
bool IsComplete() const {
@@ -150,6 +151,8 @@ namespace NActors {
150151

151152
void ParseExtendedFormatPayload(TRope::TConstIterator &iter, size_t &size, TVector<TRope> &payload, size_t &totalPayloadSize);
152153
bool SerializeToArcadiaStreamImpl(TChunkSerializer* chunker, const TVector<TRope> &payload);
154+
ui32 CalculateSerializedHeaderSizeImpl(const TVector<TRope> &payload);
155+
std::optional<TRope> SerializeToRopeImpl(std::function<TRcBuf(ui32 size)> alloc, const TVector<TRope> &payload);
153156
ui32 CalculateSerializedSizeImpl(const TVector<TRope> &payload, ssize_t recordSize);
154157
TEventSerializationInfo CreateSerializationInfoImpl(size_t preserializedSize, bool allowExternalDataChannel, const TVector<TRope> &payload, ssize_t recordSize);
155158

@@ -202,6 +205,22 @@ namespace NActors {
202205
return CalculateSerializedSizeImpl(Payload, Record.ByteSize());
203206
}
204207

208+
std::optional<TRope> SerializeToRope(std::function<TRcBuf(ui32 size)> alloc) const override {
209+
std::optional<TRope> result = SerializeToRopeImpl(alloc, Payload);
210+
if (!result) {
211+
return {};
212+
}
213+
ui32 size = Record.ByteSizeLong();
214+
TRcBuf recordsSerializedBuf = alloc(size);
215+
if (!recordsSerializedBuf) {
216+
return {};
217+
}
218+
bool serializationDone = Record.SerializePartialToArray(recordsSerializedBuf.GetDataMut(), size);
219+
Y_ABORT_UNLESS(serializationDone);
220+
result->Insert(result->End(), std::move(recordsSerializedBuf));
221+
return result;
222+
}
223+
205224
static TEv* Load(const TEventSerializedData *input) {
206225
THolder<TEv> holder(new TEv());
207226
TEventPBBase* ev = holder.Get();

ydb/library/actors/interconnect/channel_scheduler.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ namespace NActors {
1414
std::shared_ptr<IInterconnectMetrics> Metrics;
1515
const ui32 MaxSerializedEventSize;
1616
const TSessionParams Params;
17+
std::shared_ptr<NInterconnect::NRdma::IMemPool> RdmaMemPool;
1718

1819
struct THeapItem {
1920
TEventOutputChannel *Channel;
@@ -29,11 +30,12 @@ namespace NActors {
2930
public:
3031
TChannelScheduler(ui32 peerNodeId, const TChannelsConfig& predefinedChannels,
3132
std::shared_ptr<IInterconnectMetrics> metrics, ui32 maxSerializedEventSize,
32-
TSessionParams params)
33+
TSessionParams params, std::shared_ptr<NInterconnect::NRdma::IMemPool> rdmaMemPool)
3334
: PeerNodeId(peerNodeId)
3435
, Metrics(std::move(metrics))
3536
, MaxSerializedEventSize(maxSerializedEventSize)
3637
, Params(std::move(params))
38+
, RdmaMemPool(std::move(rdmaMemPool))
3739
{
3840
for (const auto& item : predefinedChannels) {
3941
GetOutputChannel(item.first);
@@ -71,15 +73,15 @@ namespace NActors {
7173
auto& res = ChannelArray[channel];
7274
if (Y_UNLIKELY(!res)) {
7375
res.emplace(channel, PeerNodeId, MaxSerializedEventSize, Metrics,
74-
Params);
76+
Params, RdmaMemPool);
7577
}
7678
return *res;
7779
} else {
7880
auto it = ChannelMap.find(channel);
7981
if (Y_UNLIKELY(it == ChannelMap.end())) {
8082
it = ChannelMap.emplace(std::piecewise_construct, std::forward_as_tuple(channel),
8183
std::forward_as_tuple(channel, PeerNodeId, MaxSerializedEventSize,
82-
Metrics, Params)).first;
84+
Metrics, Params, RdmaMemPool)).first;
8385
}
8486
return it->second;
8587
}

0 commit comments

Comments
 (0)