Skip to content

Commit 7e5023c

Browse files
Add benchmarks scripts for Torch MultiQueue benchmarks
1 parent ceae49b commit 7e5023c

File tree

2 files changed

+94
-0
lines changed

2 files changed

+94
-0
lines changed

devops/scripts/benchmarks/benches/compute.py

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -315,6 +315,38 @@ def createRrBench(variant_name: str, **kwargs):
315315
),
316316
]
317317

318+
for runtime in RUNTIMES:
319+
if runtime != RUNTIMES.UR:
320+
321+
def createTorchMultiQueueBench(variant_name: str, **kwargs):
322+
return TorchMultiQueue(
323+
self,
324+
runtime,
325+
variant_name,
326+
PROFILERS.TIMER,
327+
**kwargs,
328+
)
329+
330+
benches += [
331+
createTorchMultiQueueBench(
332+
"large",
333+
workgroupCount=4096,
334+
workgroupSize=512,
335+
kernelsPerQueue=20,
336+
),
337+
createTorchMultiQueueBench(
338+
"medium",
339+
workgroupCount=512,
340+
workgroupSize=256,
341+
kernelsPerQueue=10,
342+
),
343+
createTorchMultiQueueBench(
344+
"small",
345+
workgroupCount=256,
346+
workgroupSize=124,
347+
kernelsPerQueue=4,
348+
),
349+
]
318350
# Add UR-specific benchmarks
319351
benches += [
320352
# TODO: multithread_benchmark_ur fails with segfault
@@ -735,6 +767,48 @@ def _bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]:
735767
return [f"--{k}={v}" for k, v in self._rr_params.items()]
736768

737769

770+
class TorchMultiQueue(ComputeBenchmark):
771+
def __init__(
772+
self, suite, runtime: RUNTIMES, variant_name: str, profiler_type, **kwargs
773+
):
774+
self._variant_name = variant_name
775+
self._smq_params = kwargs
776+
self._iterations_regular = 1000
777+
self._iterations_trace = 10
778+
super().__init__(
779+
suite,
780+
f"torch_benchmark_{runtime.value}",
781+
"KernelSubmitMultiQueue",
782+
runtime,
783+
profiler_type,
784+
)
785+
786+
def explicit_group(self):
787+
return f"{self._test} {self._variant_name}"
788+
789+
def display_name(self) -> str:
790+
return f"{self.explicit_group()}_{self._runtime.value}"
791+
792+
def get_tags(self):
793+
return [runtime_to_tag_name(self._runtime)]
794+
795+
def name(self):
796+
ret = []
797+
for k, v in self._smq_params.items():
798+
ret.append(f"{k} {v}")
799+
ret.sort()
800+
return self._bench_name + " " + ", ".join(ret)
801+
802+
def _supported_runtimes(self) -> list[RUNTIMES]:
803+
return super()._supported_runtimes() + [RUNTIMES.SYCL_PREVIEW]
804+
805+
def _bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]:
806+
iters = self._get_iters(run_trace)
807+
return [f"--iterations={iters}"] + [
808+
f"--{k}={v}" for k, v in self._smq_params.items()
809+
]
810+
811+
738812
class QueueInOrderMemcpy(ComputeBenchmark):
739813
def __init__(self, bench, isCopyOnly, source, destination, size, profiler_type):
740814
self._is_copy_only = isCopyOnly

devops/scripts/benchmarks/tests/test_integration.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,26 @@ def test_submit_kernel(self):
188188
{"L0", "latency", "micro", "submit"},
189189
)
190190

191+
def test_torch_l0(self):
192+
self._checkCase(
193+
"torch_benchmark_l0 kernelsPerQueue 20, workgroupCount 4096, workgroupSize 512",
194+
"KernelSubmitMultiQueue large",
195+
{"L0"},
196+
)
197+
198+
def test_torch_sycl(self):
199+
self._checkCase(
200+
"torch_benchmark_sycl kernelsPerQueue 20, workgroupCount 4096, workgroupSize 512",
201+
"KernelSubmitMultiQueue large",
202+
{"SYCL"},
203+
)
204+
205+
def test_torch_syclpreview(self):
206+
self._checkCase(
207+
"torch_benchmark_syclpreview kernelsPerQueue 20, workgroupCount 4096, workgroupSize 512",
208+
"KernelSubmitMultiQueue large",
209+
{"SYCL"},
210+
)
191211

192212
if __name__ == "__main__":
193213
unittest.main()

0 commit comments

Comments
 (0)