@@ -51,7 +51,7 @@ function Base.getindex(dims::ROCDim3, idx::Int)
5151end
5252
5353"""
54- launch(queue::HSAQueue , signal::HSASignal , f::ROCFunction,
54+ launch(queue::RuntimeQueue , signal::RuntimeEvent , f::ROCFunction,
5555 groupsize::ROCDim, gridsize::ROCDim, args...)
5656
5757Low-level call to launch a ROC function `f` on the GPU, using `groupsize` and
@@ -63,7 +63,7 @@ copied to the internal kernel parameter buffer, or a pointer to device memory.
6363
6464This is a low-level call, preferably use [`roccall`](@ref) instead.
6565"""
66- @inline function launch (queue:: HSAQueue , signal:: HSASignal , f:: ROCFunction ,
66+ @inline function launch (queue:: RuntimeQueue , signal:: RuntimeEvent , f:: ROCFunction ,
6767 groupsize:: ROCDim , gridsize:: ROCDim , args... )
6868 groupsize = ROCDim3 (groupsize)
6969 gridsize = ROCDim3 (gridsize)
7777
7878# we need a generated function to get an args array,
7979# without having to inspect the types at runtime
80- @generated function _launch (queue:: HSAQueue , signal:: HSASignal , f:: ROCFunction ,
80+ @generated function _launch (queue:: RuntimeQueue , signal:: RuntimeEvent , f:: ROCFunction ,
8181 groupsize:: ROCDim3 , gridsize:: ROCDim3 ,
8282 args:: NTuple{N,Any} ) where N
8383
@@ -101,31 +101,21 @@ end
101101 GC. @preserve $ (arg_refs... ) begin
102102 kernelParams = [$ (arg_ptrs... )]
103103
104- # link with ld.lld
105- ld_path = HSARuntime. ld_lld_path
106- @assert ld_path != " " " ld.lld was not found; cannot link kernel"
107- # TODO : Do this more idiomatically
108- io = open (" /tmp/amdgpu-dump.o" , " w" )
109- write (io, f. mod. data)
110- close (io)
111- run (` $ld_path -shared -o /tmp/amdgpu.exe /tmp/amdgpu-dump.o` )
112- io = open (" /tmp/amdgpu.exe" , " r" )
113- data = read (io)
114- close (io)
115-
116- # generate executable and kernel instance
117- exe = HSAExecutable (queue. agent, data, f. entry)
118- kern = HSAKernelInstance (queue. agent, exe, f. entry, args)
119- HSARuntime. launch! (queue, kern, signal;
120- workgroup_size= groupsize, grid_size= gridsize)
104+ # create executable and kernel instance
105+ exe = create_executable (get_device (queue), f)
106+ kern = create_kernel (get_device (queue), exe, f. entry, args)
107+
108+ # launch kernel
109+ launch_kernel (queue, kern, signal;
110+ groupsize= groupsize, gridsize= gridsize)
121111 end
122112 end ). args)
123113
124114 return ex
125115end
126116
127117"""
128- roccall(queue::HSAQueue , signal::HSASignal , f::ROCFunction, types, values...;
118+ roccall(queue::RuntimeQueue , signal::RuntimeEvent , f::ROCFunction, types, values...;
129119 groupsize::ROCDim, gridsize::ROCDim)
130120
131121`ccall`-like interface for launching a ROC function `f` on a GPU.
@@ -151,14 +141,14 @@ being slightly faster.
151141"""
152142roccall
153143
154- @inline function roccall (queue:: HSAQueue , signal:: HSASignal , f:: ROCFunction , types:: NTuple{N,DataType} , values:: Vararg{Any,N} ;
144+ @inline function roccall (queue:: RuntimeQueue , signal:: RuntimeEvent , f:: ROCFunction , types:: NTuple{N,DataType} , values:: Vararg{Any,N} ;
155145 kwargs... ) where N
156146 # this cannot be inferred properly (because types only contains `DataType`s),
157147 # which results in the call `@generated _roccall` getting expanded upon first use
158148 _roccall (queue, signal, f, Tuple{types... }, values; kwargs... )
159149end
160150
161- @inline function roccall (queue:: HSAQueue , signal:: HSASignal , f:: ROCFunction , tt:: Type , values:: Vararg{Any,N} ;
151+ @inline function roccall (queue:: RuntimeQueue , signal:: RuntimeEvent , f:: ROCFunction , tt:: Type , values:: Vararg{Any,N} ;
162152 kwargs... ) where N
163153 # in this case, the type of `tt` is `Tuple{<:DataType,...}`,
164154 # which means the generated function can be expanded earlier
167157
168158# we need a generated function to get a tuple of converted arguments (using unsafe_convert),
169159# without having to inspect the types at runtime
170- @generated function _roccall (queue:: HSAQueue , signal:: HSASignal , f:: ROCFunction , tt:: Type , args:: NTuple{N,Any} ;
160+ @generated function _roccall (queue:: RuntimeQueue , signal:: RuntimeEvent , f:: ROCFunction , tt:: Type , args:: NTuple{N,Any} ;
171161 groupsize:: ROCDim = 1 , gridsize:: ROCDim = 1 ) where N
172162
173163 # the type of `tt` is Type{Tuple{<:DataType...}}
0 commit comments