Adapt to initialization API changes.

maleadt · maleadt · commit 8635a2faa705 · 2019-12-20T12:22:01.000+01:00
diff --git a/Manifest.toml b/Manifest.toml
@@ -30,15 +30,15 @@ version = "2.0.0"
 
 [[CUDAdrv]]
 deps = ["CEnum", "CUDAapi", "Printf"]
-git-tree-sha1 = "5a9dd9ec20a5a2c78c784c31361cf5a813c7a9c2"
+git-tree-sha1 = "5c2cf00a78503e1f71409cecf3d64508fb33f17f"
 repo-rev = "master"
 repo-url = "https://github.com/JuliaGPU/CUDAdrv.jl.git"
 uuid = "c5f51814-7f29-56b8-a69c-e4d8f6be1fde"
 version = "4.0.4"
 
 [[CUDAnative]]
 deps = ["Adapt", "CEnum", "CUDAapi", "CUDAdrv", "DataStructures", "InteractiveUtils", "LLVM", "Libdl", "Printf", "TimerOutputs"]
-git-tree-sha1 = "1a04a76171016f68f4790e9643524a3ac31f3d32"
+git-tree-sha1 = "8b1a585344fee94bdb95ac44653fd057d74e32e6"
 repo-rev = "master"
 repo-url = "https://github.com/JuliaGPU/CUDAnative.jl.git"
 uuid = "be33ccc6-a3ff-5ff2-a52e-74243cff1e17"
diff --git a/src/blas/CUBLAS.jl b/src/blas/CUBLAS.jl
@@ -35,13 +35,13 @@ const active_xt_handles = Vector{Union{Nothing,cublasXtHandle_t}}()
 function handle()
     tid = Threads.threadid()
     if @inbounds active_handles[tid] === nothing
-        context = CuGetContext()
-        active_handles[tid] = get!(created_handles, context) do
+        ctx = context()
+        active_handles[tid] = get!(created_handles, ctx) do
             handle = cublasCreate_v2()
-            atexit(()->CUDAdrv.isvalid(context) && cublasDestroy_v2(handle))
+            atexit(()->CUDAdrv.isvalid(ctx) && cublasDestroy_v2(handle))
 
             # enable tensor math mode if our device supports it, and fast math is enabled
-            dev = CUDAdrv.device(context)
+            dev = CUDAdrv.device()
             if Base.JLOptions().fast_math == 1 && CUDAdrv.capability(dev) >= v"7.0" && version() >= v"9"
                 cublasSetMathMode(CUBLAS_TENSOR_OP_MATH, handle)
             end
@@ -55,11 +55,10 @@ end
 function xt_handle()
     tid = Threads.threadid()
     if @inbounds active_xt_handles[tid] === nothing
-        CUDAnative.maybe_initialize("cublasXtGetHandle")
-        context = CuCurrentContext()
-        active_xt_handles[tid] = get!(created_xt_handles, context) do
+        ctx = context()
+        active_xt_handles[tid] = get!(created_xt_handles, ctx) do
             handle = cublasXtCreate()
-            atexit(()->CUDAdrv.isvalid(context) && cublasXtDestroy(handle))
+            atexit(()->CUDAdrv.isvalid(ctx) && cublasXtDestroy(handle))
 
             # select the devices
             # TODO: this is weird, since we typically use a single device per thread/context
@@ -79,7 +78,7 @@ function __init__()
     resize!(active_xt_handles, Threads.nthreads())
     fill!(active_xt_handles, nothing)
 
-    CUDAnative.atcontextswitch() do tid, ctx, dev
+    CUDAnative.atcontextswitch() do tid, ctx
         # we don't eagerly initialize handles, but do so lazily when requested
         active_handles[tid] = nothing
         active_xt_handles[tid] = nothing
diff --git a/src/blas/error.jl b/src/blas/error.jl
@@ -55,7 +55,7 @@ end
 macro check(ex)
     fun = Symbol(decode_ccall_function(ex))
     init = if !in(fun, preinit_apicalls)
-        :(CUDAnative.maybe_initialize($(QuoteNode(fun))))
+        :(CUDAnative.maybe_initialize())
     end
     quote
         $init
diff --git a/src/dnn/CUDNN.jl b/src/dnn/CUDNN.jl
@@ -47,10 +47,10 @@ const active_handles = Vector{Union{Nothing,cudnnHandle_t}}()
 function handle()
     tid = Threads.threadid()
     if @inbounds active_handles[tid] === nothing
-        context = CuGetContext()
-        active_handles[tid] = get!(created_handles, context) do
+        ctx = context()
+        active_handles[tid] = get!(created_handles, ctx) do
             handle = cudnnCreate()
-            atexit(()->CUDAdrv.isvalid(context) && cudnnDestroy(handle))
+            atexit(()->CUDAdrv.isvalid(ctx) && cudnnDestroy(handle))
             handle
         end
     end
@@ -61,7 +61,7 @@ function __init__()
     resize!(active_handles, Threads.nthreads())
     fill!(active_handles, nothing)
 
-    CUDAnative.atcontextswitch() do tid, ctx, dev
+    CUDAnative.atcontextswitch() do tid, ctx
         # we don't eagerly initialize handles, but do so lazily when requested
         active_handles[tid] = nothing
     end
diff --git a/src/dnn/error.jl b/src/dnn/error.jl
@@ -30,7 +30,7 @@ end
 macro check(ex)
     fun = Symbol(decode_ccall_function(ex))
     init = if !in(fun, preinit_apicalls)
-        :(CUDAnative.maybe_initialize($(QuoteNode(fun))))
+        :(CUDAnative.maybe_initialize())
     end
     quote
         $init
diff --git a/src/fft/error.jl b/src/fft/error.jl
@@ -68,7 +68,7 @@ end
 macro check(ex)
     fun = Symbol(decode_ccall_function(ex))
     init = if !in(fun, preinit_apicalls)
-        :(CUDAnative.maybe_initialize($(QuoteNode(fun))))
+        :(CUDAnative.maybe_initialize())
     end
     quote
         $init
diff --git a/src/memory.jl b/src/memory.jl
@@ -280,9 +280,7 @@ synchronized right before and after executing `ex` to exclude any external effec
 macro time(ex)
     quote
         # @time might surround an application, so be sure to initialize CUDA before that
-        # FIXME: this should be done in CUDAdrv (`synchronize(ctx=CuCurrentOrNewContext()`)
-        #        but the CUDA initialization mechanics are part of CUDAnative.jl
-        CUDAnative.maybe_initialize("@time")
+        CUDAnative.maybe_initialize()
 
         # coarse synchronization to exclude effects from previously-executed code
         CUDAdrv.synchronize()
diff --git a/src/rand/CURAND.jl b/src/rand/CURAND.jl
@@ -30,8 +30,8 @@ const active_generators = Vector{Union{Nothing,RNG}}()
 function generator()
     tid = Threads.threadid()
     if @inbounds active_generators[tid] === nothing
-        context = CuGetContext()
-        active_generators[tid] = get!(created_generators, context) do
+        ctx = context()
+        active_generators[tid] = get!(created_generators, ctx) do
             RNG()
         end
     end
@@ -42,7 +42,7 @@ function __init__()
     resize!(active_generators, Threads.nthreads())
     fill!(active_generators, nothing)
 
-    CUDAnative.atcontextswitch() do tid, ctx, dev
+    CUDAnative.atcontextswitch() do tid, ctx
         # we don't eagerly initialize handles, but do so lazily when requested
         active_generators[tid] = nothing
     end
diff --git a/src/rand/error.jl b/src/rand/error.jl
@@ -60,7 +60,7 @@ end
 macro check(ex)
     fun = Symbol(decode_ccall_function(ex))
     init = if !in(fun, preinit_apicalls)
-        :(CUDAnative.maybe_initialize($(QuoteNode(fun))))
+        :(CUDAnative.maybe_initialize())
     end
     quote
         $init
diff --git a/src/solver/CUSOLVER.jl b/src/solver/CUSOLVER.jl
@@ -37,10 +37,10 @@ const active_sparse_handles = Vector{Union{Nothing,cusolverSpHandle_t}}()
 function dense_handle()
     tid = Threads.threadid()
     if @inbounds active_dense_handles[tid] === nothing
-        context = CuGetContext()
-        active_dense_handles[tid] = get!(created_dense_handles, context) do
+        ctx = context()
+        active_dense_handles[tid] = get!(created_dense_handles, ctx) do
             handle = cusolverDnCreate()
-            atexit(()->CUDAdrv.isvalid(context) && cusolverDnDestroy(handle))
+            atexit(()->CUDAdrv.isvalid(ctx) && cusolverDnDestroy(handle))
             handle
         end
     end
@@ -50,11 +50,10 @@ end
 function sparse_handle()
     tid = Threads.threadid()
     if @inbounds active_sparse_handles[tid] === nothing
-        CUDAnative.maybe_initialize("cublasXtGetHandle")
-        context = CuCurrentContext()
-        active_sparse_handles[tid] = get!(created_sparse_handles, context) do
+        ctx = context()
+        active_sparse_handles[tid] = get!(created_sparse_handles, ctx) do
             handle = cusolverSpCreate()
-            atexit(()->CUDAdrv.isvalid(context) && cusolverSpDestroy(handle))
+            atexit(()->CUDAdrv.isvalid(ctx) && cusolverSpDestroy(handle))
             handle
         end
     end
@@ -68,7 +67,7 @@ function __init__()
     resize!(active_sparse_handles, Threads.nthreads())
     fill!(active_sparse_handles, nothing)
 
-    CUDAnative.atcontextswitch() do tid, ctx, dev
+    CUDAnative.atcontextswitch() do tid, ctx
         # we don't eagerly initialize handles, but do so lazily when requested
         active_dense_handles[tid] = nothing
         active_sparse_handles[tid] = nothing
diff --git a/src/solver/error.jl b/src/solver/error.jl
@@ -50,7 +50,7 @@ end
 macro check(ex)
     fun = Symbol(decode_ccall_function(ex))
     init = if !in(fun, preinit_apicalls)
-        :(CUDAnative.maybe_initialize($(QuoteNode(fun))))
+        :(CUDAnative.maybe_initialize())
     end
     quote
         $init
diff --git a/src/sparse/CUSPARSE.jl b/src/sparse/CUSPARSE.jl
@@ -35,10 +35,10 @@ const active_handles = Vector{Union{Nothing,cusparseHandle_t}}()
 function handle()
     tid = Threads.threadid()
     if @inbounds active_handles[tid] === nothing
-        context = CuGetContext()
-        active_handles[tid] = get!(created_handles, context) do
+        ctx = context()
+        active_handles[tid] = get!(created_handles, ctx) do
             handle = cusparseCreate()
-            atexit(()->CUDAdrv.isvalid(context) && cusparseDestroy(handle))
+            atexit(()->CUDAdrv.isvalid(ctx) && cusparseDestroy(handle))
             handle
         end
     end
@@ -49,7 +49,7 @@ function __init__()
     resize!(active_handles, Threads.nthreads())
     fill!(active_handles, nothing)
 
-    CUDAnative.atcontextswitch() do tid, ctx, dev
+    CUDAnative.atcontextswitch() do tid, ctx
         # we don't eagerly initialize handles, but do so lazily when requested
         active_handles[tid] = nothing
     end
diff --git a/src/sparse/error.jl b/src/sparse/error.jl
@@ -60,7 +60,7 @@ end
 macro check(ex)
     fun = Symbol(decode_ccall_function(ex))
     init = if !in(fun, preinit_apicalls)
-        :(CUDAnative.maybe_initialize($(QuoteNode(fun))))
+        :(CUDAnative.maybe_initialize())
     end
     quote
         $init
diff --git a/src/tensor/CUTENSOR.jl b/src/tensor/CUTENSOR.jl
@@ -34,8 +34,8 @@ const active_handles = Vector{Union{Nothing,Ref{cutensorHandle_t}}}()
 function handle()
     tid = Threads.threadid()
     if @inbounds active_handles[tid] === nothing
-        context = CuGetContext()
-        active_handles[tid] = get!(created_handles, context) do
+        ctx = context()
+        active_handles[tid] = get!(created_handles, ctx) do
             handle = Ref{cutensorHandle_t}()
             cutensorInit(handle)
             handle
@@ -48,7 +48,7 @@ function __init__()
     resize!(active_handles, Threads.nthreads())
     fill!(active_handles, nothing)
 
-    CUDAnative.atcontextswitch() do tid, ctx, dev
+    CUDAnative.atcontextswitch() do tid, ctx
         # we don't eagerly initialize handles, but do so lazily when requested
         active_handles[tid] = nothing
     end
diff --git a/src/tensor/error.jl b/src/tensor/error.jl
@@ -63,7 +63,7 @@ end
 macro check(ex)
     fun = Symbol(decode_ccall_function(ex))
     init = if !in(fun, preinit_apicalls)
-        :(CUDAnative.maybe_initialize($(QuoteNode(fun))))
+        :(CUDAnative.maybe_initialize())
     end
     quote
         $init