@@ -62,7 +62,7 @@ def git_url(self) -> str:
6262
6363 def git_hash (self ) -> str :
6464 # Nov 17, 2025
65- return "932ae79f7cca7e156285fc10a59610927c769e89 "
65+ return "ec6710ff85cb6bd9232ca67237e782618b4d8382 "
6666
6767 def setup (self ) -> None :
6868 if options .sycl is None :
@@ -269,6 +269,7 @@ def benchmarks(self) -> list[Benchmark]:
269269 )
270270 )
271271
272+ # Add RecordAndReplay benchmarks
272273 record_and_replay_params = product ([0 , 1 ], [0 , 1 ])
273274 for emulate , instantiate in record_and_replay_params :
274275
@@ -315,6 +316,39 @@ def createRrBench(variant_name: str, **kwargs):
315316 ),
316317 ]
317318
319+ # Add TorchMultiQueue benchmarks
320+ for runtime in filter (lambda x : x != RUNTIMES .UR , RUNTIMES ):
321+
322+ def createTorchMultiQueueBench (variant_name : str , ** kwargs ):
323+ return TorchMultiQueue (
324+ self ,
325+ runtime ,
326+ variant_name ,
327+ PROFILERS .TIMER ,
328+ ** kwargs ,
329+ )
330+
331+ benches += [
332+ createTorchMultiQueueBench (
333+ "large" ,
334+ workgroupCount = 4096 ,
335+ workgroupSize = 512 ,
336+ kernelsPerQueue = 20 ,
337+ ),
338+ createTorchMultiQueueBench (
339+ "medium" ,
340+ workgroupCount = 512 ,
341+ workgroupSize = 256 ,
342+ kernelsPerQueue = 10 ,
343+ ),
344+ createTorchMultiQueueBench (
345+ "small" ,
346+ workgroupCount = 256 ,
347+ workgroupSize = 124 ,
348+ kernelsPerQueue = 4 ,
349+ ),
350+ ]
351+
318352 # Add UR-specific benchmarks
319353 benches += [
320354 # TODO: multithread_benchmark_ur fails with segfault
@@ -735,6 +769,48 @@ def _bin_args(self, run_trace: TracingType = TracingType.NONE) -> list[str]:
735769 return [f"--{ k } ={ v } " for k , v in self ._rr_params .items ()]
736770
737771
772+ class TorchMultiQueue (ComputeBenchmark ):
773+ def __init__ (
774+ self , suite , runtime : RUNTIMES , variant_name : str , profiler_type , ** kwargs
775+ ):
776+ self ._variant_name = variant_name
777+ self ._smq_params = kwargs
778+ self ._iterations_regular = 1000
779+ self ._iterations_trace = 10
780+ super ().__init__ (
781+ suite ,
782+ f"torch_benchmark_{ runtime .value } " ,
783+ "KernelSubmitMultiQueue" ,
784+ runtime ,
785+ profiler_type ,
786+ )
787+
788+ def name (self ):
789+ ret = []
790+ for k , v in self ._smq_params .items ():
791+ ret .append (f"{ k } { v } " )
792+ ret .sort ()
793+ return self ._bench_name + " " + ", " .join (ret )
794+
795+ def display_name (self ) -> str :
796+ return f"{ self .explicit_group ()} { self ._runtime .value } "
797+
798+ def explicit_group (self ):
799+ return f"{ self ._test } { self ._variant_name } "
800+
801+ def get_tags (self ):
802+ return ["pytorch_" + runtime_to_tag_name (self ._runtime )]
803+
804+ def _supported_runtimes (self ) -> list [RUNTIMES ]:
805+ return super ()._supported_runtimes () + [RUNTIMES .SYCL_PREVIEW ]
806+
807+ def _bin_args (self , run_trace : TracingType = TracingType .NONE ) -> list [str ]:
808+ iters = self ._get_iters (run_trace )
809+ return [f"--iterations={ iters } " ] + [
810+ f"--{ k } ={ v } " for k , v in self ._smq_params .items ()
811+ ]
812+
813+
738814class QueueInOrderMemcpy (ComputeBenchmark ):
739815 def __init__ (self , bench , isCopyOnly , source , destination , size , profiler_type ):
740816 self ._is_copy_only = isCopyOnly
0 commit comments