11#--- source.hlsl
2- RWStructuredBuffer<uint > Out : register(u0);
2+ RWStructuredBuffer<float > Out : register(u0);
33
44[numthreads(8,1,1)]
55void main(uint3 TID : SV_GroupThreadID) {
6+ float maxActiveLaneCount = min(WaveGetLaneCount(), 8);
7+ float div1 = 1.0/maxActiveLaneCount;
8+ float div2 = 2.0/maxActiveLaneCount;
9+ float div4 = 4.0/maxActiveLaneCount;
10+ float div8 = 8.0/maxActiveLaneCount;
11+
12+ float result = 0;
13+
614 // First non-uniform branch
7- if (TID.x < 4 ) {
15+ if (TID.x % 2 == 0 ) {
816 // Second non-uniform branch
9- if (TID.x % 2 == 0) {
10- Out[TID.x] = WaveActiveSum(TID.x );
17+ if (TID.x % 4 == 0) {
18+ result + = WaveActiveSum(div4 );
1119 } else {
12- Out[TID.x] = WaveActiveMax(TID.x );
20+ result += WaveActiveSum(div8 );
1321 }
1422 // Must reconverge here with maximal reconvergence
15- Out[TID.x] += WaveActiveMax(TID.x);
16- } else {
17- Out[4] = WaveActiveMax(TID.x);
23+ result += WaveActiveSum(div2);
1824 }
19- Out[TID.x] += WaveActiveMax(TID.x);
25+
26+ // Must reconverge here with maximal reconvergence
27+ result += WaveActiveSum(div1);
28+ Out[TID.x] = result;
2029}
2130
2231//--- pipeline.yaml
@@ -28,7 +37,7 @@ Shaders:
2837 DispatchSize: [1, 1, 1]
2938Buffers:
3039 - Name: Out
31- Format: UInt32
40+ Format: Float32
3241 Data: [ 0, 0, 0, 0, 0, 0, 0, 0 ]
3342DescriptorSets:
3443 - Resources:
@@ -44,16 +53,16 @@ DescriptorSets:
4453#--- end
4554# UNSUPPORTED: Vulkan && !VK_KHR_shader_maximal_reconvergence
4655
47- # BUG: https://github.com/llvm/offload-test-suite/issues/490
48- # XFAIL: WARP && DirectX
49-
5056# BUG: https://github.com/llvm/llvm-project/issues/165288
5157# XFAIL: !WARP && Clang && (DirectX || Metal)
5258
59+ # BUG: https://github.com/llvm/llvm-project/issues/99159
60+ # XFAIL: Clang
61+
5362# RUN: split-file %s %t
5463# RUN: %dxc_target -T cs_6_0 -fspv-enable-maximal-reconvergence -Fo %t.o %t/source.hlsl
5564# RUN: %offloader %t/pipeline.yaml %t.o | FileCheck %s
5665
5766# CHECK: Name: Out
58- # CHECK: Format: UInt32
59- # CHECK: Data: [ 12, 13, 12, 13, 14, 7, 7, 7 ]
67+ # CHECK: Format: Float32
68+ # CHECK: Data: [ 3, 1, 4, 1, 3, 1, 4, 1 ]
0 commit comments