Skip to content

Commit 03e4181

Browse files
Add benchmarks scripts for Torch MultiQueue benchmarks
1 parent 2752822 commit 03e4181

File tree

4 files changed

+124
-2
lines changed

4 files changed

+124
-2
lines changed

devops/actions/run-tests/benchmark/action.yml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,11 @@ runs:
166166
with:
167167
ref: ${{ env.BENCHMARK_RESULTS_BRANCH }}
168168
path: llvm-ci-perf-results
169+
170+
# - name: Show compute-benchmarks version
171+
# shell: bash
172+
# run: |
173+
169174
- name: Build and run benchmarks
170175
env:
171176
# Need to append "_<device>_<backend>" to save name in order to follow
@@ -274,6 +279,19 @@ runs:
274279
export COMPUTE_BENCHMARKS_BUILD_PATH=$WORKDIR/compute-benchmarks-build
275280
python3 ./devops/scripts/benchmarks/tests/test_integration.py
276281
fi
282+
283+
- name: Show compute-benchmarks version
284+
shell: bash
285+
run: |
286+
echo "Compute-benchmarks version info:"
287+
python3 -c "
288+
import sys
289+
sys.path.append('./devops/scripts/benchmarks')
290+
from benches.compute import ComputeBench
291+
bench = ComputeBench()
292+
print(f'Git hash: {bench.git_hash()}')
293+
print(f'Git URL: {bench.git_url()}')
294+
"
277295
- name: Cache changes and upload github summary
278296
if: always()
279297
shell: bash

devops/scripts/benchmarks/benches/compute.py

Lines changed: 80 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,10 @@ def git_url(self) -> str:
6262

6363
def git_hash(self) -> str:
6464
# Nov 17, 2025
65-
return "932ae79f7cca7e156285fc10a59610927c769e89"
65+
git_hash_value = "ec6710ff85cb6bd9232ca67237e782618b4d8382"
66+
log.info(f"ComputeBench git hash: {git_hash_value}")
67+
68+
return "ec6710ff85cb6bd9232ca67237e782618b4d8382"
6669

6770
def setup(self) -> None:
6871
if options.sycl is None:
@@ -269,6 +272,7 @@ def benchmarks(self) -> list[Benchmark]:
269272
)
270273
)
271274

275+
# Add RecordAndReplay benchmarks
272276
record_and_replay_params = product([0, 1], [0, 1])
273277
for emulate, instantiate in record_and_replay_params:
274278

@@ -315,6 +319,39 @@ def createRrBench(variant_name: str, **kwargs):
315319
),
316320
]
317321

322+
# Add TorchMultiQueue benchmarks
323+
for runtime in filter(lambda x: x != RUNTIMES.UR, RUNTIMES):
324+
325+
def createTorchMultiQueueBench(variant_name: str, **kwargs):
326+
return TorchMultiQueue(
327+
self,
328+
runtime,
329+
variant_name,
330+
PROFILERS.TIMER,
331+
**kwargs,
332+
)
333+
334+
benches += [
335+
createTorchMultiQueueBench(
336+
"large",
337+
workgroupCount=4096,
338+
workgroupSize=512,
339+
kernelsPerQueue=20,
340+
),
341+
createTorchMultiQueueBench(
342+
"medium",
343+
workgroupCount=512,
344+
workgroupSize=256,
345+
kernelsPerQueue=10,
346+
),
347+
createTorchMultiQueueBench(
348+
"small",
349+
workgroupCount=256,
350+
workgroupSize=124,
351+
kernelsPerQueue=4,
352+
),
353+
]
354+
318355
# Add UR-specific benchmarks
319356
benches += [
320357
# TODO: multithread_benchmark_ur fails with segfault
@@ -735,6 +772,48 @@ def _bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]:
735772
return [f"--{k}={v}" for k, v in self._rr_params.items()]
736773

737774

775+
class TorchMultiQueue(ComputeBenchmark):
776+
def __init__(
777+
self, suite, runtime: RUNTIMES, variant_name: str, profiler_type, **kwargs
778+
):
779+
self._variant_name = variant_name
780+
self._smq_params = kwargs
781+
self._iterations_regular = 1000
782+
self._iterations_trace = 10
783+
super().__init__(
784+
suite,
785+
f"torch_benchmark_{runtime.value}",
786+
"KernelSubmitMultiQueue",
787+
runtime,
788+
profiler_type,
789+
)
790+
791+
def name(self):
792+
ret = []
793+
for k, v in self._smq_params.items():
794+
ret.append(f"{k} {v}")
795+
ret.sort()
796+
return self._bench_name + " " + ", ".join(ret)
797+
798+
def display_name(self) -> str:
799+
return f"{self.explicit_group()} {self._runtime.value}"
800+
801+
def explicit_group(self):
802+
return f"{self._test} {self._variant_name}"
803+
804+
def get_tags(self):
805+
return ["pytorch_" + runtime_to_tag_name(self._runtime)]
806+
807+
def _supported_runtimes(self) -> list[RUNTIMES]:
808+
return super()._supported_runtimes() + [RUNTIMES.SYCL_PREVIEW]
809+
810+
def _bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]:
811+
iters = self._get_iters(run_trace)
812+
return [f"--iterations={iters}"] + [
813+
f"--{k}={v}" for k, v in self._smq_params.items()
814+
]
815+
816+
738817
class QueueInOrderMemcpy(ComputeBenchmark):
739818
def __init__(self, bench, isCopyOnly, source, destination, size, profiler_type):
740819
self._is_copy_only = isCopyOnly

devops/scripts/benchmarks/main.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,6 @@ def run_iterations(
108108
"""
109109

110110
for iter in range(iters):
111-
log.info(f"running {benchmark.name()}, iteration {iter}... ")
112111
try:
113112
bench_results = benchmark.run(
114113
env_vars, run_trace=run_trace, force_trace=force_trace
@@ -207,7 +206,9 @@ def process_results(
207206
if stddev_threshold_override is not None
208207
else options.stddev_threshold
209208
)
209+
print("Threshold",threshold)
210210
threshold_scaled = threshold * mean_value
211+
print(threshold_scaled)
211212

212213
if stddev > threshold_scaled:
213214
log.warning(
@@ -293,6 +294,9 @@ def main(directory, additional_env_vars, compare_names, filter, execution_stats)
293294
# TODO: add a mode where we fail entire script in case of setup (or other) failures and use in CI
294295

295296
for s in suites:
297+
if isinstance(s, ComputeBench):
298+
log.info(f"Benchmarks version - {s.name()}: {s.git_hash()}")
299+
296300
if s.name() not in enabled_suites(options.preset):
297301
continue
298302

@@ -887,6 +891,7 @@ def validate_and_parse_env_args(env_args):
887891
execution_stats["warnings"] += 1
888892

889893
log.info(f"Selected device architecture: {options.device_architecture}")
894+
log.info("Benchmarks version", )
890895

891896
main(
892897
args.benchmark_directory,

devops/scripts/benchmarks/tests/test_integration.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,26 @@ def test_submit_kernel(self):
188188
{"L0", "latency", "micro", "submit"},
189189
)
190190

191+
def test_torch_l0(self):
192+
self._checkCase(
193+
"torch_benchmark_l0 kernelsPerQueue 20, workgroupCount 4096, workgroupSize 512",
194+
"KernelSubmitMultiQueue large",
195+
{"pytorch_L0"},
196+
)
197+
198+
def test_torch_sycl(self):
199+
self._checkCase(
200+
"torch_benchmark_sycl kernelsPerQueue 10, workgroupCount 512, workgroupSize 256",
201+
"KernelSubmitMultiQueue medium",
202+
{"pytorch_SYCL"},
203+
)
204+
205+
def test_torch_syclpreview(self):
206+
self._checkCase(
207+
"torch_benchmark_syclpreview kernelsPerQueue 4, workgroupCount 256, workgroupSize 124",
208+
"KernelSubmitMultiQueue small",
209+
{"pytorch_SYCL"},
210+
)
191211

192212
if __name__ == "__main__":
193213
unittest.main()

0 commit comments

Comments
 (0)