From b6636d394a9f2ecb10a3092e07b730d08c88aea3 Mon Sep 17 00:00:00 2001 From: Tex Riddell Date: Fri, 31 Oct 2025 17:11:21 -0700 Subject: [PATCH] Fix OOB in subscript indexing of col-major matrix in cbuffer A bug in HLOperationLower code that lowers the matrix subscript operation when the matrix is in a cbuffer could cause out-of-bounds alloca indexing. The indexing creates an alloca to store a column-vector loaded from one cbuffer vector. This should have one element per row of the matrix, so it can then index along that dimension using the local array. The bug was using the number of columns when sizing the alloca instead of the number of rows. This caused it to write to out-of-bounds index when the number of rows exceeded the number of columns (like for float3x2). --- lib/HLSL/HLOperationLower.cpp | 2 +- .../cb-col-matrix-subscript-dxilgen.ll | 158 ++++++++++++++++++ 2 files changed, 159 insertions(+), 1 deletion(-) create mode 100644 tools/clang/test/DXC/Passes/DxilGen/cb-col-matrix-subscript-dxilgen.ll diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp index c9c9bb4c88..b0ba051875 100644 --- a/lib/HLSL/HLOperationLower.cpp +++ b/lib/HLSL/HLOperationLower.cpp @@ -8189,7 +8189,7 @@ void TranslateCBAddressUserLegacy(Instruction *user, Value *handle, // row.z = c[2].[idx] // row.w = c[3].[idx] Value *Elts[4]; - ArrayType *AT = ArrayType::get(EltTy, MatTy.getNumColumns()); + ArrayType *AT = ArrayType::get(EltTy, MatTy.getNumRows()); IRBuilder<> AllocaBuilder(user->getParent() ->getParent() diff --git a/tools/clang/test/DXC/Passes/DxilGen/cb-col-matrix-subscript-dxilgen.ll b/tools/clang/test/DXC/Passes/DxilGen/cb-col-matrix-subscript-dxilgen.ll new file mode 100644 index 0000000000..14d8d93b1f --- /dev/null +++ b/tools/clang/test/DXC/Passes/DxilGen/cb-col-matrix-subscript-dxilgen.ll @@ -0,0 +1,158 @@ +; RUN: %dxopt %s -hlsl-passes-resume -dxilgen -S | FileCheck %s + +; Source HLSL: +; // dxc -T cs_6_0 +; float3x2 M; +; RWStructuredBuffer SB; +; [numthreads(3,1,1)] +; void main(uint row : SV_GroupIndex) { +; SB[row] = M[row]; +; } + +; Check cbuffer column-major matrix subscript codegen uses correct alloca size. + +; Column-major matrix subscript is lowered in HLOperationLower by: +; Loading one column into an alloca, dynamically indexing that by row, +; then doing the same for the next column. +; Each column is 3 floats for 3 rows. Thus alloca size should be 3 floats. + +; alloca size should be number of rows (3), not number of columns (2) +; CHECK: %[[alloca:[^ ]+]] = alloca [3 x float] +; CHECK: %[[row:[^ ]+]] = call i32 @dx.op.flattenedThreadIdInGroup.i32(i32 96) +; CHECK: %[[cb0:[^ ]+]] = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %[[cbh:[^ ]+]], i32 0) +; CHECK: %[[_cb_ex0:[^ ]+]] = extractvalue %dx.types.CBufRet.f32 %[[cb0]], 0 +; CHECK: %[[_v3x:[^ ]+]] = insertelement <3 x float> undef, float %[[_cb_ex0]], i64 0 +; CHECK: %[[_cb_ex1:[^ ]+]] = extractvalue %dx.types.CBufRet.f32 %[[cb0]], 1 +; CHECK: %[[_v3xy:[^ ]+]] = insertelement <3 x float> %[[_v3x]], float %[[_cb_ex1]], i64 1 +; CHECK: %[[_cb_ex2:[^ ]+]] = extractvalue %dx.types.CBufRet.f32 %[[cb0]], 2 +; CHECK: %[[_v3xyz:[^ ]+]] = insertelement <3 x float> %[[_v3xy]], float %[[_cb_ex2]], i64 2 +; CHECK: %[[cb0_x:[^ ]+]] = extractelement <3 x float> %[[_v3xyz]], i32 0 +; CHECK: %[[_alloca_0:[^ ]+]] = getelementptr inbounds [3 x float], [3 x float]* %[[alloca]], i32 0, i32 0 +; CHECK: store float %[[cb0_x]], float* %[[_alloca_0]] +; CHECK: %[[cb0_y:[^ ]+]] = extractelement <3 x float> %[[_v3xyz]], i32 1 +; CHECK: %[[_alloca_1:[^ ]+]] = getelementptr inbounds [3 x float], [3 x float]* %[[alloca]], i32 0, i32 1 +; CHECK: store float %[[cb0_y]], float* %[[_alloca_1]] +; CHECK: %[[cb0_z:[^ ]+]] = extractelement <3 x float> %[[_v3xyz]], i32 2 +; CHECK: %[[_alloca_2:[^ ]+]] = getelementptr inbounds [3 x float], [3 x float]* %[[alloca]], i32 0, i32 2 +; CHECK: store float %[[cb0_z]], float* %[[_alloca_2]] +; CHECK: %[[_alloca_row:[^ ]+]] = getelementptr inbounds [3 x float], [3 x float]* %[[alloca]], i32 0, i32 %[[row]] +; CHECK: %[[M_row_0:[^ ]+]] = load float, float* %[[_alloca_row]] +; CHECK: %[[cb1:[^ ]+]] = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %[[cbh]], i32 1) +; CHECK: %[[_cb_ex0:[^ ]+]] = extractvalue %dx.types.CBufRet.f32 %[[cb1]], 0 +; CHECK: %[[_v3x:[^ ]+]] = insertelement <3 x float> undef, float %[[_cb_ex0]], i64 0 +; CHECK: %[[_cb_ex1:[^ ]+]] = extractvalue %dx.types.CBufRet.f32 %[[cb1]], 1 +; CHECK: %[[_v3xy:[^ ]+]] = insertelement <3 x float> %[[_v3x]], float %[[_cb_ex1]], i64 1 +; CHECK: %[[_cb_ex2:[^ ]+]] = extractvalue %dx.types.CBufRet.f32 %[[cb1]], 2 +; CHECK: %[[_v3xyz:[^ ]+]] = insertelement <3 x float> %[[_v3xy]], float %[[_cb_ex2]], i64 2 +; CHECK: %[[cb1_x:[^ ]+]] = extractelement <3 x float> %[[_v3xyz]], i32 0 +; CHECK: %[[_alloca_0:[^ ]+]] = getelementptr inbounds [3 x float], [3 x float]* %[[alloca]], i32 0, i32 0 +; CHECK: store float %[[cb1_x]], float* %[[_alloca_0]] +; CHECK: %[[cb1_y:[^ ]+]] = extractelement <3 x float> %[[_v3xyz]], i32 1 +; CHECK: %[[_alloca_1:[^ ]+]] = getelementptr inbounds [3 x float], [3 x float]* %[[alloca]], i32 0, i32 1 +; CHECK: store float %[[cb1_y]], float* %[[_alloca_1]] +; CHECK: %[[cb1_z:[^ ]+]] = extractelement <3 x float> %[[_v3xyz]], i32 2 +; CHECK: %[[_alloca_2:[^ ]+]] = getelementptr inbounds [3 x float], [3 x float]* %[[alloca]], i32 0, i32 2 +; CHECK: store float %[[cb1_z]], float* %[[_alloca_2]] +; CHECK: %[[_alloca_row:[^ ]+]] = getelementptr inbounds [3 x float], [3 x float]* %[[alloca]], i32 0, i32 %[[row]] +; CHECK: %[[M_row_1:[^ ]+]] = load float, float* %[[_alloca_row]] +; CHECK: %[[M_row_v2x:[^ ]+]] = insertelement <2 x float> undef, float %[[M_row_0]], i64 0 +; CHECK: %[[M_row_v2xy:[^ ]+]] = insertelement <2 x float> %[[M_row_v2x]], float %[[M_row_1]], i64 1 +; CHECK: %[[ex_M_row_0:[^ ]+]] = extractelement <2 x float> %[[M_row_v2xy]], i64 0 +; CHECK: %[[ex_M_row_1:[^ ]+]] = extractelement <2 x float> %[[M_row_v2xy]], i64 1 +; CHECK: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %{{[^,]+}}, i32 %[[row]], i32 0, float %[[ex_M_row_0]], float %[[ex_M_row_1]], float undef, float undef, i8 3, i32 4) + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%"class.RWStructuredBuffer >" = type { <2 x float> } +%"$Globals" = type { %class.matrix.float.3.2 } +%class.matrix.float.3.2 = type { [3 x <2 x float>] } +%dx.types.Handle = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } + +@"\01?SB@@3V?$RWStructuredBuffer@V?$vector@M$01@@@@A" = external global %"class.RWStructuredBuffer >", align 4 +@"$Globals" = external constant %"$Globals" + +; Function Attrs: nounwind +define void @main(i32 %row) #0 { +entry: + %0 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22$Globals\22*, i32)"(i32 0, %"$Globals"* @"$Globals", i32 0) + %1 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22$Globals\22)"(i32 14, %dx.types.Handle %0, %dx.types.ResourceProperties { i32 13, i32 28 }, %"$Globals" undef) + %2 = call %"$Globals"* @"dx.hl.subscript.cb.rn.%\22$Globals\22* (i32, %dx.types.Handle, i32)"(i32 6, %dx.types.Handle %1, i32 0) + %3 = getelementptr inbounds %"$Globals", %"$Globals"* %2, i32 0, i32 0 + %4 = add i32 3, %row + %5 = call <2 x float>* @"dx.hl.subscript.colMajor[].rn.<2 x float>* (i32, %class.matrix.float.3.2*, i32, i32)"(i32 1, %class.matrix.float.3.2* %3, i32 %row, i32 %4) + %6 = load <2 x float>, <2 x float>* %5 + %7 = load %"class.RWStructuredBuffer >", %"class.RWStructuredBuffer >"* @"\01?SB@@3V?$RWStructuredBuffer@V?$vector@M$01@@@@A" + %8 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32 0, %"class.RWStructuredBuffer >" %7) + %9 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32 14, %dx.types.Handle %8, %dx.types.ResourceProperties { i32 4108, i32 8 }, %"class.RWStructuredBuffer >" zeroinitializer) + %10 = call <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32 0, %dx.types.Handle %9, i32 %row) + store <2 x float> %6, <2 x float>* %10 + ret void +} + +; Function Attrs: nounwind readnone +declare <2 x float>* @"dx.hl.subscript.colMajor[].rn.<2 x float>* (i32, %class.matrix.float.3.2*, i32, i32)"(i32, %class.matrix.float.3.2*, i32, i32) #1 + +; Function Attrs: nounwind readnone +declare <2 x float>* @"dx.hl.subscript.[].rn.<2 x float>* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22class.RWStructuredBuffer >\22)"(i32, %"class.RWStructuredBuffer >") #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22class.RWStructuredBuffer >\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"class.RWStructuredBuffer >") #1 + +; Function Attrs: nounwind readnone +declare %"$Globals"* @"dx.hl.subscript.cb.rn.%\22$Globals\22* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22$Globals\22*, i32)"(i32, %"$Globals"*, i32) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22$Globals\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"$Globals") #1 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } + +!pauseresume = !{!0} +!llvm.ident = !{!1} +!dx.fnprops = !{!2} +!dx.options = !{!3, !4} +!dx.version = !{!5} +!dx.valver = !{!6} +!dx.shaderModel = !{!7} +!dx.resources = !{!8} +!dx.typeAnnotations = !{!14, !20} +!dx.entryPoints = !{!27} + +!0 = !{!"hlsl-hlemit", !"hlsl-hlensure"} +!1 = !{!"dxc(private) 1.8.0.15080 (main, c52246122)"} +!2 = !{void (i32)* @main, i32 5, i32 3, i32 1, i32 1} +!3 = !{i32 -2147483584} +!4 = !{i32 -1} +!5 = !{i32 1, i32 0} +!6 = !{i32 1, i32 10} +!7 = !{!"cs", i32 6, i32 0} +!8 = !{null, !9, !12, null} +!9 = !{!10} +!10 = !{i32 0, %"class.RWStructuredBuffer >"* @"\01?SB@@3V?$RWStructuredBuffer@V?$vector@M$01@@@@A", !"SB", i32 -1, i32 -1, i32 1, i32 12, i1 false, i1 false, i1 false, !11} +!11 = !{i32 1, i32 8} +!12 = !{!13} +!13 = !{i32 0, %"$Globals"* @"$Globals", !"$Globals", i32 0, i32 -1, i32 1, i32 28, null} +!14 = !{i32 0, %"class.RWStructuredBuffer >" undef, !15, %"$Globals" undef, !17} +!15 = !{i32 8, !16} +!16 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 9} +!17 = !{i32 28, !18} +!18 = !{i32 6, !"M", i32 2, !19, i32 3, i32 0, i32 7, i32 9} +!19 = !{i32 3, i32 2, i32 2} +!20 = !{i32 1, void (i32)* @main, !21} +!21 = !{!22, !24} +!22 = !{i32 1, !23, !23} +!23 = !{} +!24 = !{i32 0, !25, !26} +!25 = !{i32 4, !"SV_GroupIndex", i32 7, i32 5} +!26 = !{i32 0} +!27 = !{void (i32)* @main, !"main", null, !8, !28} +!28 = !{i32 4, !29} +!29 = !{i32 -858993460, i32 -858993460, i32 -858993460}