Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
f04b23b
Initial draft of cursor port
djns99 Nov 18, 2025
0d346d7
Properly cache jit compiled module
djns99 Nov 19, 2025
4624721
Cleanup prints
djns99 Nov 19, 2025
87b8b56
Combine tests run without crashing
djns99 Nov 19, 2025
0151b7a
Update tests with fake_moe properly
djns99 Nov 19, 2025
2d79642
Clear MOE workspace before each run
djns99 Nov 19, 2025
8dcbca1
Cleanup MPI processes on test failures
djns99 Nov 20, 2025
9733bd0
More exit handling for rank failures
djns99 Nov 20, 2025
671c66d
Cleaner test implementation
djns99 Nov 20, 2025
63b8538
Update MNNVL config setup
djns99 Nov 26, 2025
202ced2
Update test to get ep size from MPI
djns99 Nov 26, 2025
9490c5f
Update tests with better test bounds
djns99 Nov 26, 2025
06ada51
Fix timeout logic
djns99 Nov 26, 2025
377e719
Disable python steps for MPI tests
djns99 Nov 26, 2025
e6058eb
Standardise API name to match existing code better
djns99 Nov 27, 2025
6950cd2
Enhance tests and add convenience APIs for more general usage
djns99 Nov 27, 2025
59ed9ab
Fix existing dispatch tests
djns99 Nov 27, 2025
3338587
Tests for sanitize and combine
djns99 Nov 28, 2025
ae9fd7a
Fix logic for inplace combine workspace setup
djns99 Nov 28, 2025
1abf2b2
Limit num tokens to allow combine to successfully run on 1 GPU
djns99 Nov 28, 2025
6ba34fd
Unify naming
djns99 Nov 28, 2025
6fe85b9
Add test for payload not in the workspace and fix coderabbit comments
djns99 Nov 28, 2025
8e9801d
Update comm.rst
djns99 Nov 28, 2025
732e94c
Fix coderabbit nits
djns99 Nov 28, 2025
f019122
Properly export all functions
djns99 Nov 28, 2025
de11030
Add A2A single GPU tests to CI
djns99 Dec 3, 2025
ad067f5
Remove internal API from docs
djns99 Dec 3, 2025
d08f7fa
Align workspace calculation to match TRT-LLM style
djns99 Dec 4, 2025
f99d8ba
Add checks for MNNVL support for tests that use the API
djns99 Dec 4, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 34 additions & 6 deletions csrc/nv_internal/cpp/common/envUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -222,11 +222,6 @@ bool getEnvDisaggLayerwise() {
return disaggLayerwise;
}

bool getEnvParallelCacheSend() {
static bool const parallelCacheSend = getBoolEnv("TRTLLM_PARALLEL_CACHE_SEND");
return parallelCacheSend;
}

bool getEnvRequestKVCacheConcurrent() {
static bool const requestKVCacheConcurrent = getBoolEnv("TRTLLM_REQUEST_KV_CACHE_CONCURRENT");
return requestKVCacheConcurrent;
Expand Down Expand Up @@ -277,7 +272,7 @@ size_t getEnvAllReduceWorkspaceSize() {
return workspaceSize;
}

std::string getEnvKVCacheTransferOutputPath() {
std::string const& getEnvKVCacheTimeOutputPath() {
static std::string outputPath = getStrEnv("TRTLLM_KVCACHE_TIME_OUTPUT_PATH").value_or("");
return outputPath;
}
Expand Down Expand Up @@ -328,4 +323,37 @@ uint16_t getEnvNixlPort() {

bool getEnvDisaggBenchmarkGenOnly() { return getBoolEnv("TRTLLM_DISAGG_BENCHMARK_GEN_ONLY"); }

bool getEnvMoeA2AOneBlockPerToken() {
// Default true; return false only if env set to "0"
static std::optional<int32_t> const val = getIntEnv("TLLM_MOE_A2A_ONE_BLOCK_PER_TOKEN");
if (!val.has_value()) {
return true;
}
return val.value() != 0;
}

static int sanitizeBlockSize(std::optional<int32_t> const& val) {
// Default 256 when not set or invalid
int block = val.value_or(256);
// Clamp to sane CUDA bounds and warp multiples
if (block <= 0) block = 256;
if (block > 1024) block = 1024;
// Round to nearest multiple of 32 (warp size)
block = (block + 31) / 32 * 32;
if (block == 0) block = 256;
return block;
}

int getEnvMoeA2ADispatchBlockSize() {
static int const kBlock = sanitizeBlockSize(getIntEnv("TLLM_MOE_A2A_DISPATCH_BLOCK_SIZE"));
return kBlock;
}

int getEnvMoeA2ACombineBlockSize() {
static int const kBlock = sanitizeBlockSize(getIntEnv("TLLM_MOE_A2A_COMBINE_BLOCK_SIZE"));
return kBlock;
}

bool getEnvEplbForceGdrcopy() { return getBoolEnv("TRTLLM_EPLB_FORCE_GDRCOPY"); }

} // namespace tensorrt_llm::common
11 changes: 10 additions & 1 deletion csrc/nv_internal/tensorrt_llm/common/envUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ bool getEnvDisableKVCacheTransferOverlap();

bool getEnvEnableReceiveKVCacheParallel();

std::string getEnvKVCacheTransferOutputPath();
std::string const& getEnvKVCacheTimeOutputPath();

bool getEnvTryZCopyForKVCacheTransfer();

Expand Down Expand Up @@ -92,4 +92,13 @@ size_t getEnvKVCacheSendMaxConcurrenceNum();

size_t getEnvMemSizeForKVCacheTransferBuffer();

// Whether to use one block per token for MoE A2A kernels (default true).
bool getEnvMoeA2AOneBlockPerToken();

// TODO: For DEV purpose temporarily.
// Block size (threads per block) for MoE A2A Dispatch kernels (default 256 if unset or invalid)
int getEnvMoeA2ADispatchBlockSize();
// Block size (threads per block) for MoE A2A Combine kernels (default 256 if unset or invalid)
int getEnvMoeA2ACombineBlockSize();

} // namespace tensorrt_llm::common
Loading