From 2da83fa854f3e83ee884a61c8578b649c09b704b Mon Sep 17 00:00:00 2001 From: Anton Oresten Date: Fri, 28 Nov 2025 18:58:06 +0100 Subject: [PATCH] add methods for BFloat16 type with tests in cuDNN --- lib/cudnn/Project.toml | 4 +++- lib/cudnn/src/util.jl | 6 ++++++ lib/cudnn/test/Project.toml | 1 + lib/cudnn/test/activation.jl | 8 ++++++++ lib/cudnn/test/pooling.jl | 2 ++ lib/cudnn/test/softmax.jl | 7 +++++++ 6 files changed, 27 insertions(+), 1 deletion(-) diff --git a/lib/cudnn/Project.toml b/lib/cudnn/Project.toml index 686e387b9f..e856197c67 100644 --- a/lib/cudnn/Project.toml +++ b/lib/cudnn/Project.toml @@ -1,15 +1,17 @@ name = "cuDNN" uuid = "02a925ec-e4fe-4b08-9a7e-0d78e3d38ccd" -authors = ["Tim Besard "] version = "1.4.6" +authors = ["Tim Besard "] [deps] +BFloat16s = "ab4f0b2a-ad5b-11e8-123f-65d77653426b" CEnum = "fa961155-64e5-5f13-b03f-caf6b980ea82" CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" CUDA_Runtime_Discovery = "1af6417a-86b4-443c-805f-a4643ffb695f" CUDNN_jll = "62b44479-cb7b-5706-934f-f13b2eb2e645" [compat] +BFloat16s = "0.6.0" CEnum = "0.2, 0.3, 0.4, 0.5" CUDA = "~5.9" CUDA_Runtime_Discovery = "0.2, 0.3, 1" diff --git a/lib/cudnn/src/util.jl b/lib/cudnn/src/util.jl index 2ee1034bcb..8923ff9b56 100644 --- a/lib/cudnn/src/util.jl +++ b/lib/cudnn/src/util.jl @@ -1,12 +1,16 @@ +using BFloat16s: BFloat16 + # For low level cudnn functions that require a pointer to a number cptr(x,a::DenseCuArray{Float64})=Float64[x] cptr(x,a::DenseCuArray{Float32})=Float32[x] cptr(x,a::DenseCuArray{Float16})=Float32[x] +cptr(x,a::DenseCuArray{BFloat16})=Float32[x] # Conversion between Julia and cuDNN datatypes cudnnDataType(::Type{Float16})=CUDNN_DATA_HALF cudnnDataType(::Type{Float32})=CUDNN_DATA_FLOAT cudnnDataType(::Type{Float64})=CUDNN_DATA_DOUBLE +cudnnDataType(::Type{BFloat16})=CUDNN_DATA_BFLOAT16 cudnnDataType(::Type{Int8}) = CUDNN_DATA_INT8 cudnnDataType(::Type{UInt8}) = CUDNN_DATA_UINT8 cudnnDataType(::Type{Int32}) = CUDNN_DATA_INT32 @@ -17,6 +21,7 @@ cudnnDataType(::Type{Int32}) = CUDNN_DATA_INT32 juliaDataType(a)=(a==CUDNN_DATA_HALF ? Float16 : a==CUDNN_DATA_FLOAT ? Float32 : a==CUDNN_DATA_DOUBLE ? Float64 : + a==CUDNN_DATA_BFLOAT16 ? BFloat16 : a==CUDNN_DATA_INT8 ? Int8 : a==CUDNN_DATA_UINT8 ? UInt8 : a==CUDNN_DATA_INT32 ? Int32 : error()) @@ -35,6 +40,7 @@ scalingParameter(T, val) = error("Unknown tensor type $T") scalingParameter(::Type{Float16}, val) = Ref{Float32}(val) scalingParameter(::Type{Float32}, val) = Ref{Float32}(val) scalingParameter(::Type{Float64}, val) = Ref{Float64}(val) +scalingParameter(::Type{BFloat16}, val) = Ref{Float32}(val) # Create temporary reserveSpace. Use 128 to avoid alignment issues. diff --git a/lib/cudnn/test/Project.toml b/lib/cudnn/test/Project.toml index e11819f7ea..42ab9f7e53 100644 --- a/lib/cudnn/test/Project.toml +++ b/lib/cudnn/test/Project.toml @@ -1,4 +1,5 @@ [deps] +BFloat16s = "ab4f0b2a-ad5b-11e8-123f-65d77653426b" CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" CUDA_Runtime_jll = "76a88914-d11a-5bdc-97e0-2f5a05c973a2" Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b" diff --git a/lib/cudnn/test/activation.jl b/lib/cudnn/test/activation.jl index 14a8530f43..7b7f2f01a8 100644 --- a/lib/cudnn/test/activation.jl +++ b/lib/cudnn/test/activation.jl @@ -1,3 +1,4 @@ +using BFloat16s: BFloat16 using cuDNN: cudnnActivationForward, cudnnActivationForward!, @@ -59,3 +60,10 @@ activationtest(coef=2,mode=CUDNN_ACTIVATION_CLIPPED_RELU) activationtest(coef=2,mode=CUDNN_ACTIVATION_ELU) activationtest(alpha=2) activationtest(beta=2) + +# BFloat16 tests +(ax,ay) = randn.(BFloat16, (10,10)) +(cx,cy) = CuArray.((ax,ay)) +activationtest(mode=CUDNN_ACTIVATION_SIGMOID) +activationtest(mode=CUDNN_ACTIVATION_RELU) +activationtest(mode=CUDNN_ACTIVATION_TANH) diff --git a/lib/cudnn/test/pooling.jl b/lib/cudnn/test/pooling.jl index 3c88ac69f2..af6c13a333 100644 --- a/lib/cudnn/test/pooling.jl +++ b/lib/cudnn/test/pooling.jl @@ -1,4 +1,5 @@ using CUDA, Random +using BFloat16s: BFloat16 import NNlib using cuDNN: cudnnPoolingForward, @@ -87,5 +88,6 @@ pooltest(padding = 1) pooltest(stride = 1) pooltest(format = CUDNN_TENSOR_NHWC) pooltest(dataType = Float16) +pooltest(dataType = BFloat16) pooltest(alpha = 2) pooltest(beta = 2) diff --git a/lib/cudnn/test/softmax.jl b/lib/cudnn/test/softmax.jl index b428ee2217..68967bc1dd 100644 --- a/lib/cudnn/test/softmax.jl +++ b/lib/cudnn/test/softmax.jl @@ -1,3 +1,4 @@ +using BFloat16s: BFloat16 using cuDNN: cudnnSoftmaxForward, cudnnSoftmaxForward!, @@ -43,3 +44,9 @@ softmaxtest(mode=CUDNN_SOFTMAX_MODE_CHANNEL) softmaxtest(algo=CUDNN_SOFTMAX_FAST) softmaxtest(algo=CUDNN_SOFTMAX_ACCURATE) softmaxtest(algo=CUDNN_SOFTMAX_LOG) + +# BFloat16 tests +ax,ay = randn(BFloat16,10,10),randn(BFloat16,10,10) +cx,cy = CuArray.((ax,ay)) +softmaxtest() +softmaxtest(algo=CUDNN_SOFTMAX_LOG)