Skip to content

Commit 64fc022

Browse files
committed
Polish
Signed-off-by: Kaiyu Xie <26294424+kaiyux@users.noreply.github.com>
1 parent 15a3231 commit 64fc022

File tree

1 file changed

+9
-11
lines changed

1 file changed

+9
-11
lines changed

examples/disaggregated/slurm/benchmark/start_worker.sh

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -40,29 +40,27 @@ fi
4040

4141
echo "config_file: ${config_file}"
4242

43-
# save the hostname to a file
44-
45-
# if SLURM_NODEID is 0
43+
# if SLURM_NODEID is 0, save the hostname to a file
4644
if [ "${SLURM_NODEID}" = "0" ]; then
4745
mkdir -p ${log_dir}/hostnames/
4846
echo $(hostname) > ${log_dir}/hostnames/${role}_${instance_id}.txt
4947
echo "hostname saved to ${log_dir}/hostnames/${role}_${instance_id}.txt"
5048
fi
5149

52-
#check if nsys is enabled
50+
nsys_prefix=""
5351
if [ "${enable_nsys}" != "true" ]; then
5452
echo "nsys is not enabled, start normal flow"
55-
trtllm-llmapi-launch ${numa_bind_cmd} trtllm-serve ${model_path} --host $(hostname) --port ${port} --extra_llm_api_options ${config_file}
5653
else
57-
nsys_prefix=""
5854
nsys_file=${log_dir}/nsys_worker_proc_${role}_${instance_id}_${SLURM_PROCID}
5955
export TLLM_PROFILE_RECORD_GC=1
6056
export TLLM_NVTX_DEBUG=1
61-
nsys_prefix="nsys profile -e \"NSYS_MPI_STORE_TEAMS_PER_RANK=1\" -o ${nsys_file} -f true -t cuda,nvtx,python-gil -c cudaProfilerApi --cuda-graph-trace node --capture-range-end=stop --gpu-metrics-devices=none"
57+
export NSYS_MPI_STORE_TEAMS_PER_RANK=1
6258
export TLLM_PROFILE_START_STOP=${profile_range}
6359
echo "nsys is enabled on ${role} GPUs, TLLM_PROFILE_START_STOP=${profile_range}"
64-
${nsys_prefix} trtllm-llmapi-launch ${numa_bind_cmd} \
65-
trtllm-serve ${model_path} \
66-
--host $(hostname) --port ${port} \
67-
--extra_llm_api_options ${config_file}
60+
nsys_prefix="nsys profile -o ${nsys_file} -f true -t cuda,nvtx,python-gil -c cudaProfilerApi --cuda-graph-trace node --capture-range-end=stop --gpu-metrics-devices=none"
6861
fi
62+
63+
${nsys_prefix} trtllm-llmapi-launch ${numa_bind_cmd} \
64+
trtllm-serve ${model_path} \
65+
--host $(hostname) --port ${port} \
66+
--extra_llm_api_options ${config_file}

0 commit comments

Comments
 (0)