Skip to content

Commit 14d1702

Browse files
committed
Support custome log dir
Signed-off-by: Kaiyu Xie <26294424+kaiyux@users.noreply.github.com>
1 parent 923f6b1 commit 14d1702

File tree

1 file changed

+26
-18
lines changed
  • examples/disaggregated/slurm/benchmark

1 file changed

+26
-18
lines changed

examples/disaggregated/slurm/benchmark/submit.py

Lines changed: 26 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import shutil
77
import subprocess
88
import sys
9+
from datetime import datetime
910

1011
import yaml
1112

@@ -22,6 +23,10 @@ def parse_args():
2223
'--dir',
2324
type=str,
2425
help='Directory containing YAML configuration files')
26+
group.add_argument('--log-dir',
27+
type=str,
28+
default=None,
29+
help='Log directory')
2530
return parser.parse_args()
2631

2732

@@ -45,7 +50,7 @@ def calculate_nodes(world_size, num_servers, gpus_per_node):
4550
return (world_size + gpus_per_node - 1) // gpus_per_node * num_servers
4651

4752

48-
def submit_job(config):
53+
def submit_job(config, log_dir):
4954
# Extract configurations
5055
slurm_config = config['slurm']
5156
hw_config = config['hardware']
@@ -101,25 +106,28 @@ def submit_job(config):
101106
gen_enable_attention_dp = config['worker_config']['gen'][
102107
'enable_attention_dp']
103108

104-
# Create base log directory path
105-
log_base = os.path.join(env_config['work_dir'], f"{isl}-{osl}")
109+
if log_dir is None:
110+
# Create base log directory path
111+
date_prefix = datetime.now().strftime("%Y%m%d")
112+
log_base = os.path.join(env_config['work_dir'], f"{date_prefix}/{isl}-{osl}")
106113

107-
# Get eplb num_slots for gen worker
108-
load_balancer_config = config['worker_config']['gen'].get(
109-
'moe_config', {}).get('load_balancer', {})
110-
if isinstance(load_balancer_config, str):
111-
with open(load_balancer_config, 'r') as f:
112-
load_balancer_config = yaml.safe_load(f)
113-
eplb_num_slots = load_balancer_config.get('num_slots', 0)
114+
# Get eplb num_slots for gen worker
115+
load_balancer_config = config['worker_config']['gen'].get(
116+
'moe_config', {}).get('load_balancer', {})
117+
if isinstance(load_balancer_config, str):
118+
with open(load_balancer_config, 'r') as f:
119+
load_balancer_config = yaml.safe_load(f)
120+
eplb_num_slots = load_balancer_config.get('num_slots', 0)
114121

115-
# Determine directory suffix based on attention_dp
116-
if gen_enable_attention_dp:
117-
dir_suffix = f"ctx{ctx_num}_gen{gen_num}_dep{gen_tp_size}_batch{gen_batch_size}_eplb{eplb_num_slots}_mtp{mtp_size}"
118-
else:
119-
dir_suffix = f"ctx{ctx_num}_gen{gen_num}_tep{gen_tp_size}_batch{gen_batch_size}_eplb{eplb_num_slots}_mtp{mtp_size}"
122+
# Determine directory suffix based on attention_dp
123+
if gen_enable_attention_dp:
124+
dir_suffix = f"ctx{ctx_num}_gen{gen_num}_dep{gen_tp_size}_batch{gen_batch_size}_eplb{eplb_num_slots}_mtp{mtp_size}"
125+
else:
126+
dir_suffix = f"ctx{ctx_num}_gen{gen_num}_tep{gen_tp_size}_batch{gen_batch_size}_eplb{eplb_num_slots}_mtp{mtp_size}"
127+
128+
# Create full log directory path
129+
log_dir = os.path.join(log_base, dir_suffix)
120130

121-
# Create full log directory path
122-
log_dir = os.path.join(log_base, dir_suffix)
123131
# Remove existing directory if it exists
124132
if os.path.exists(log_dir):
125133
shutil.rmtree(log_dir)
@@ -231,7 +239,7 @@ def main():
231239
print(f"\nProcessing: {config_file}")
232240
try:
233241
config = load_config(config_file)
234-
submit_job(config)
242+
submit_job(config, args.log_dir)
235243
print(f"Successfully submitted job for: {config_file}")
236244
except Exception as e:
237245
print(f"Error processing {config_file}: {e}", file=sys.stderr)

0 commit comments

Comments
 (0)