From d86c2300aee9b8bf4a6a165f0bcbc0b7a89c6335 Mon Sep 17 00:00:00 2001 From: luciechoi Date: Mon, 10 Nov 2025 23:36:47 +0000 Subject: [PATCH 1/4] Fix reconvergence test to be agnostic to wave size --- .../subgroup_uniform_control_flow.test | 39 ++++++++++++------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/test/Feature/MaximalReconvergence/subgroup_uniform_control_flow.test b/test/Feature/MaximalReconvergence/subgroup_uniform_control_flow.test index 3af12646..e9bcc896 100644 --- a/test/Feature/MaximalReconvergence/subgroup_uniform_control_flow.test +++ b/test/Feature/MaximalReconvergence/subgroup_uniform_control_flow.test @@ -1,22 +1,31 @@ #--- source.hlsl -RWStructuredBuffer Out : register(u0); +RWStructuredBuffer Out : register(u0); [numthreads(8,1,1)] void main(uint3 TID : SV_GroupThreadID) { + float maxActiveLaneCount = min(WaveGetLaneCount(), 8); + float div1 = 1.0/maxActiveLaneCount; + float div2 = 2.0/maxActiveLaneCount; + float div4 = 4.0/maxActiveLaneCount; + float div8 = 8.0/maxActiveLaneCount; + + float result = 0; + // First non-uniform branch - if (TID.x < 4) { + if (TID.x % 2 == 0) { // Second non-uniform branch - if (TID.x % 2 == 0) { - Out[TID.x] = WaveActiveSum(TID.x); + if (TID.x % 4 == 0) { + result += WaveActiveSum(div4); } else { - Out[TID.x] = WaveActiveMax(TID.x); + result += WaveActiveSum(div8); } // Must reconverge here with maximal reconvergence - Out[TID.x] += WaveActiveMax(TID.x); - } else { - Out[4] = WaveActiveMax(TID.x); + result += WaveActiveSum(div2); } - Out[TID.x] += WaveActiveMax(TID.x); + + // Must reconverge here with maximal reconvergence + result += WaveActiveSum(div1); + Out[TID.x] = result; } //--- pipeline.yaml @@ -28,7 +37,7 @@ Shaders: DispatchSize: [1, 1, 1] Buffers: - Name: Out - Format: UInt32 + Format: Float32 Data: [ 0, 0, 0, 0, 0, 0, 0, 0 ] DescriptorSets: - Resources: @@ -44,16 +53,16 @@ DescriptorSets: #--- end # UNSUPPORTED: Vulkan && !VK_KHR_shader_maximal_reconvergence -# BUG: https://github.com/llvm/offload-test-suite/issues/490 -# XFAIL: WARP && DirectX - # BUG: https://github.com/llvm/llvm-project/issues/165288 # XFAIL: !WARP && Clang && (DirectX || Metal) +# BUG: https://github.com/llvm/llvm-project/issues/99159 +# XFAIL: Clang + # RUN: split-file %s %t # RUN: %dxc_target -T cs_6_0 -fspv-enable-maximal-reconvergence -Fo %t.o %t/source.hlsl # RUN: %offloader %t/pipeline.yaml %t.o | FileCheck %s # CHECK: Name: Out -# CHECK: Format: UInt32 -# CHECK: Data: [ 12, 13, 12, 13, 14, 7, 7, 7 ] +# CHECK: Format: Float32 +# CHECK: Data: [ 3, 1, 4, 1, 3, 1, 4, 1 ] From 50f92ef22e43f5f6bc32edda642f12910fc42f00 Mon Sep 17 00:00:00 2001 From: luciechoi Date: Tue, 11 Nov 2025 18:29:10 +0000 Subject: [PATCH 2/4] Revert back to WaveActiveSum(1) --- .../MaximalReconvergence/subgroup_uniform_control_flow.test | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/test/Feature/MaximalReconvergence/subgroup_uniform_control_flow.test b/test/Feature/MaximalReconvergence/subgroup_uniform_control_flow.test index e9bcc896..74a489d3 100644 --- a/test/Feature/MaximalReconvergence/subgroup_uniform_control_flow.test +++ b/test/Feature/MaximalReconvergence/subgroup_uniform_control_flow.test @@ -1,9 +1,13 @@ #--- source.hlsl RWStructuredBuffer Out : register(u0); +// https://www.khronos.org/blog/khronos-releases-maximal-reconvergence-and-quad-control-extensions-for-vulkan-and-spir-v +// This tests that waves are correctly reconverging +// after nested non-uniform branches. + [numthreads(8,1,1)] void main(uint3 TID : SV_GroupThreadID) { - float maxActiveLaneCount = min(WaveGetLaneCount(), 8); + float maxActiveLaneCount = min(WaveActiveSum(1), 8); float div1 = 1.0/maxActiveLaneCount; float div2 = 2.0/maxActiveLaneCount; float div4 = 4.0/maxActiveLaneCount; From 6a2444e1166a0f1614a8db62c553ccd01feaa3ba Mon Sep 17 00:00:00 2001 From: luciechoi Date: Tue, 11 Nov 2025 19:41:36 +0000 Subject: [PATCH 3/4] Fix to force compilers to hoist blocks outside branches --- .../subgroup_uniform_control_flow.test | 25 +++++++++---------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/test/Feature/MaximalReconvergence/subgroup_uniform_control_flow.test b/test/Feature/MaximalReconvergence/subgroup_uniform_control_flow.test index 74a489d3..6cc27f75 100644 --- a/test/Feature/MaximalReconvergence/subgroup_uniform_control_flow.test +++ b/test/Feature/MaximalReconvergence/subgroup_uniform_control_flow.test @@ -1,16 +1,15 @@ #--- source.hlsl RWStructuredBuffer Out : register(u0); -// https://www.khronos.org/blog/khronos-releases-maximal-reconvergence-and-quad-control-extensions-for-vulkan-and-spir-v -// This tests that waves are correctly reconverging -// after nested non-uniform branches. +// This test checks that compilers do not perform optimizations, such as code hoisting, +// that may change the expected behavior when `-fspv-enable-maximal-reconvergence` flag +// is set. +// Here, all the blocks inside the branches are the same but they should not be taken out +// of the branch because it will change the result. [numthreads(8,1,1)] void main(uint3 TID : SV_GroupThreadID) { float maxActiveLaneCount = min(WaveActiveSum(1), 8); - float div1 = 1.0/maxActiveLaneCount; - float div2 = 2.0/maxActiveLaneCount; - float div4 = 4.0/maxActiveLaneCount; float div8 = 8.0/maxActiveLaneCount; float result = 0; @@ -19,16 +18,19 @@ void main(uint3 TID : SV_GroupThreadID) { if (TID.x % 2 == 0) { // Second non-uniform branch if (TID.x % 4 == 0) { - result += WaveActiveSum(div4); + result += WaveActiveSum(div8); } else { result += WaveActiveSum(div8); } // Must reconverge here with maximal reconvergence - result += WaveActiveSum(div2); + result += WaveActiveSum(div8); + } else { + result += WaveActiveSum(div8); + result += WaveActiveSum(div8); } // Must reconverge here with maximal reconvergence - result += WaveActiveSum(div1); + result += WaveActiveSum(div8); Out[TID.x] = result; } @@ -60,13 +62,10 @@ DescriptorSets: # BUG: https://github.com/llvm/llvm-project/issues/165288 # XFAIL: !WARP && Clang && (DirectX || Metal) -# BUG: https://github.com/llvm/llvm-project/issues/99159 -# XFAIL: Clang - # RUN: split-file %s %t # RUN: %dxc_target -T cs_6_0 -fspv-enable-maximal-reconvergence -Fo %t.o %t/source.hlsl # RUN: %offloader %t/pipeline.yaml %t.o | FileCheck %s # CHECK: Name: Out # CHECK: Format: Float32 -# CHECK: Data: [ 3, 1, 4, 1, 3, 1, 4, 1 ] +# CHECK: Data: [ 14, 16, 14, 16, 14, 16, 14, 16 ] From 235c407a5f74476ba01d419c01c112c49478c19b Mon Sep 17 00:00:00 2001 From: luciechoi Date: Thu, 13 Nov 2025 19:45:09 +0000 Subject: [PATCH 4/4] Remove --- .../subgroup_uniform_control_flow.test | 71 ------------------- 1 file changed, 71 deletions(-) delete mode 100644 test/Feature/MaximalReconvergence/subgroup_uniform_control_flow.test diff --git a/test/Feature/MaximalReconvergence/subgroup_uniform_control_flow.test b/test/Feature/MaximalReconvergence/subgroup_uniform_control_flow.test deleted file mode 100644 index 6cc27f75..00000000 --- a/test/Feature/MaximalReconvergence/subgroup_uniform_control_flow.test +++ /dev/null @@ -1,71 +0,0 @@ -#--- source.hlsl -RWStructuredBuffer Out : register(u0); - -// This test checks that compilers do not perform optimizations, such as code hoisting, -// that may change the expected behavior when `-fspv-enable-maximal-reconvergence` flag -// is set. -// Here, all the blocks inside the branches are the same but they should not be taken out -// of the branch because it will change the result. - -[numthreads(8,1,1)] -void main(uint3 TID : SV_GroupThreadID) { - float maxActiveLaneCount = min(WaveActiveSum(1), 8); - float div8 = 8.0/maxActiveLaneCount; - - float result = 0; - - // First non-uniform branch - if (TID.x % 2 == 0) { - // Second non-uniform branch - if (TID.x % 4 == 0) { - result += WaveActiveSum(div8); - } else { - result += WaveActiveSum(div8); - } - // Must reconverge here with maximal reconvergence - result += WaveActiveSum(div8); - } else { - result += WaveActiveSum(div8); - result += WaveActiveSum(div8); - } - - // Must reconverge here with maximal reconvergence - result += WaveActiveSum(div8); - Out[TID.x] = result; -} - -//--- pipeline.yaml - ---- -Shaders: - - Stage: Compute - Entry: main - DispatchSize: [1, 1, 1] -Buffers: - - Name: Out - Format: Float32 - Data: [ 0, 0, 0, 0, 0, 0, 0, 0 ] -DescriptorSets: - - Resources: - - Name: Out - Kind: RWStructuredBuffer - DirectXBinding: - Register: 0 - Space: 0 - VulkanBinding: - Binding: 0 -... - -#--- end -# UNSUPPORTED: Vulkan && !VK_KHR_shader_maximal_reconvergence - -# BUG: https://github.com/llvm/llvm-project/issues/165288 -# XFAIL: !WARP && Clang && (DirectX || Metal) - -# RUN: split-file %s %t -# RUN: %dxc_target -T cs_6_0 -fspv-enable-maximal-reconvergence -Fo %t.o %t/source.hlsl -# RUN: %offloader %t/pipeline.yaml %t.o | FileCheck %s - -# CHECK: Name: Out -# CHECK: Format: Float32 -# CHECK: Data: [ 14, 16, 14, 16, 14, 16, 14, 16 ]