@@ -33,15 +33,8 @@ function cudaDeviceGetSharedMemConfig(pConfig)
3333 pConfig)
3434end
3535
36- if VERSION >= v " 1.2.0-DEV.512"
37- function cudaDeviceSynchronize ()
38- ccall (" extern cudaDeviceSynchronize" , llvmcall, cudaError_t, ())
39- end
40- else
41- @eval cudaDeviceSynchronize () = Base. llvmcall (
42- $ (" declare i32 @cudaDeviceSynchronize()" ,
43- " %rv = call i32 @cudaDeviceSynchronize()
44- ret i32 %rv" ), cudaError_t, Tuple{})
36+ function cudaDeviceSynchronize ()
37+ ccall (" extern cudaDeviceSynchronize" , llvmcall, cudaError_t, ())
4538end
4639
4740function cudaGetLastError ()
@@ -76,37 +69,16 @@ function cudaGetDevice(device)
7669 device)
7770end
7871
79- if VERSION >= v " 1.2.0-DEV.512"
80- function cudaStreamCreateWithFlags (pStream, flags)
81- ccall (" extern cudaStreamCreateWithFlags" , llvmcall, cudaError_t,
82- (Ptr{cudaStream_t}, UInt32),
83- pStream, flags)
84- end
85- else
86- @eval cudaStreamCreateWithFlags (pStream, flags) = Base. llvmcall (
87- $ (" declare i32 @cudaStreamCreateWithFlags(i8**, i32)" ,
88- " %pStream = inttoptr i$WORD_SIZE %0 to i8**
89- %rv = call i32 @cudaStreamCreateWithFlags(i8** %pStream, i32 %1)
90- ret i32 %rv" ), cudaError_t,
91- Tuple{Ptr{cudaStream_t}, UInt32},
92- Base. unsafe_convert (Ptr{cudaStream_t}, Base. cconvert (Ptr{cudaStream_t}, pStream)),
93- Base. unsafe_convert (UInt32, Base. cconvert (UInt32, flags)))
94- end
95-
96- if VERSION >= v " 1.2.0-DEV.512"
97- function cudaStreamDestroy (stream)
98- ccall (" extern cudaStreamDestroy" , llvmcall, cudaError_t,
99- (cudaStream_t,),
100- stream)
101- end
102- else
103- @eval cudaStreamDestroy (stream) = Base. llvmcall (
104- $ (" declare i32 @cudaStreamDestroy(i8*)" ,
105- " %stream = inttoptr i$WORD_SIZE %0 to i8*
106- %rv = call i32 @cudaStreamDestroy(i8* %stream)
107- ret i32 %rv" ), cudaError_t,
108- Tuple{cudaStream_t},
109- Base. unsafe_convert (cudaStream_t, Base. cconvert (cudaStream_t, stream)))
72+ function cudaStreamCreateWithFlags (pStream, flags)
73+ ccall (" extern cudaStreamCreateWithFlags" , llvmcall, cudaError_t,
74+ (Ptr{cudaStream_t}, UInt32),
75+ pStream, flags)
76+ end
77+
78+ function cudaStreamDestroy (stream)
79+ ccall (" extern cudaStreamDestroy" , llvmcall, cudaError_t,
80+ (cudaStream_t,),
81+ stream)
11082end
11183
11284function cudaStreamWaitEvent (stream, event, flags)
@@ -249,31 +221,10 @@ function cudaGetParameterBuffer(alignment, size)
249221 alignment, size)
250222end
251223
252- if VERSION >= v " 1.2.0-DEV.512"
253- function cudaGetParameterBufferV2 (func, gridDimension, blockDimension, sharedMemSize)
254- ccall (" extern cudaGetParameterBufferV2" , llvmcall, Ptr{Cvoid},
255- (Ptr{Cvoid}, dim3, dim3, UInt32),
256- func, gridDimension, blockDimension, sharedMemSize)
257- end
258- else
259- @eval cudaGetParameterBufferV2 (func, gridDimension, blockDimension, sharedMemSize) =
260- Base. llvmcall (
261- $ (" declare i8* @cudaGetParameterBufferV2(i8*, {i32,i32,i32}, {i32,i32,i32}, i32)" ,
262- " %func = inttoptr i$WORD_SIZE %0 to i8*
263- %gridDimension.x = insertvalue { i32, i32, i32 } undef, i32 %1, 0
264- %gridDimension.y = insertvalue { i32, i32, i32 } %gridDimension.x, i32 %2, 1
265- %gridDimension.z = insertvalue { i32, i32, i32 } %gridDimension.y, i32 %3, 2
266- %blockDimension.x = insertvalue { i32, i32, i32 } undef, i32 %4, 0
267- %blockDimension.y = insertvalue { i32, i32, i32 } %blockDimension.x, i32 %5, 1
268- %blockDimension.z = insertvalue { i32, i32, i32 } %blockDimension.y, i32 %6, 2
269- %rv = call i8* @cudaGetParameterBufferV2(i8* %func, {i32,i32,i32} %gridDimension.z, {i32,i32,i32} %blockDimension.z, i32 %7)
270- %buf = ptrtoint i8* %rv to i$WORD_SIZE
271- ret i$WORD_SIZE %buf" ), Ptr{Cvoid},
272- Tuple{Ptr{Cvoid}, Cuint, Cuint, Cuint, Cuint, Cuint, Cuint, Cuint},
273- Base. unsafe_convert (Ptr{Cvoid}, Base. cconvert (Ptr{Cvoid}, func)),
274- gridDimension. x, gridDimension. y, gridDimension. z, # known to be Cuint
275- blockDimension. x, blockDimension. y, blockDimension. z, # known to be Cuint
276- Base. unsafe_convert (Cuint, Base. cconvert (Cuint, sharedMemSize)))
224+ function cudaGetParameterBufferV2 (func, gridDimension, blockDimension, sharedMemSize)
225+ ccall (" extern cudaGetParameterBufferV2" , llvmcall, Ptr{Cvoid},
226+ (Ptr{Cvoid}, dim3, dim3, UInt32),
227+ func, gridDimension, blockDimension, sharedMemSize)
277228end
278229
279230function cudaLaunchDevice_ptsz (func, parameterBuffer, gridDimension, blockDimension,
@@ -296,23 +247,10 @@ function cudaLaunchDevice(func, parameterBuffer, gridDimension, blockDimension,
296247 func, parameterBuffer, gridDimension, blockDimension, sharedMemSize, stream)
297248end
298249
299- if VERSION >= v " 1.2.0-DEV.512"
300- function cudaLaunchDeviceV2 (parameterBuffer, stream)
301- ccall (" extern cudaLaunchDeviceV2" , llvmcall, cudaError_t,
302- (Ptr{Cvoid}, cudaStream_t),
303- parameterBuffer, stream)
304- end
305- else
306- @eval cudaLaunchDeviceV2 (parameterBuffer, stream) =
307- Base. llvmcall (
308- $ (" declare i32 @cudaLaunchDeviceV2(i8*, i8*)" ,
309- " %buf = inttoptr i$WORD_SIZE %0 to i8*
310- %stream = inttoptr i$WORD_SIZE %1 to i8*
311- %rv = call i32 @cudaLaunchDeviceV2(i8* %buf, i8* %stream)
312- ret i32 %rv" ), cudaError_t,
313- Tuple{Ptr{Cvoid}, cudaStream_t},
314- Base. unsafe_convert (Ptr{Cvoid}, Base. cconvert (Ptr{Cvoid}, parameterBuffer)),
315- Base. unsafe_convert (cudaStream_t, Base. cconvert (cudaStream_t, stream)))
250+ function cudaLaunchDeviceV2 (parameterBuffer, stream)
251+ ccall (" extern cudaLaunchDeviceV2" , llvmcall, cudaError_t,
252+ (Ptr{Cvoid}, cudaStream_t),
253+ parameterBuffer, stream)
316254end
317255
318256function cudaOccupancyMaxActiveBlocksPerMultiprocessor (numBlocks, func, blockSize,
@@ -329,35 +267,16 @@ function cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(numBlocks, func,
329267 numBlocks, func, blockSize, dynamicSmemSize, flags)
330268end
331269
332- if VERSION >= v " 1.2.0-DEV.512"
333- function cudaCGGetIntrinsicHandle (scope)
334- ccall (" extern cudaCGGetIntrinsicHandle" , llvmcall, Culonglong,
335- (cudaCGScope,),
336- scope)
337- end
338- else
339- @eval cudaCGGetIntrinsicHandle (scope) = Base. llvmcall (
340- $ (" declare i64 @cudaCGGetIntrinsicHandle(i32)" ,
341- " %rv = call i64 @cudaCGGetIntrinsicHandle(i32 1)
342- ret i64 %rv" ), Culonglong,
343- Tuple{cudaCGScope},
344- Base. unsafe_convert (cudaCGScope, Base. cconvert (cudaCGScope, scope)))
345- end
346-
347- if VERSION >= v " 1.2.0-DEV.512"
348- function cudaCGSynchronize (handle, flags)
349- ccall (" extern cudaCGSynchronize" , llvmcall, cudaError_t,
350- (Culonglong, UInt32),
351- handle, flags)
352- end
353- else
354- @eval cudaCGSynchronize (handle, flags) = Base. llvmcall (
355- $ (" declare i32 @cudaCGSynchronize(i64, i32)" ,
356- " %rv = call i32 @cudaCGSynchronize(i64 %0, i32 %1)
357- ret i32 %rv" ), cudaError_t,
358- Tuple{Culonglong, UInt32},
359- Base. unsafe_convert (Culonglong, Base. cconvert (Culonglong, handle)),
360- Base. unsafe_convert (UInt32, Base. cconvert (UInt32, flags)))
270+ function cudaCGGetIntrinsicHandle (scope)
271+ ccall (" extern cudaCGGetIntrinsicHandle" , llvmcall, Culonglong,
272+ (cudaCGScope,),
273+ scope)
274+ end
275+
276+ function cudaCGSynchronize (handle, flags)
277+ ccall (" extern cudaCGSynchronize" , llvmcall, cudaError_t,
278+ (Culonglong, UInt32),
279+ handle, flags)
361280end
362281
363282function cudaCGSynchronizeGrid (handle, flags)
0 commit comments