@@ -726,12 +726,12 @@ define amdgpu_kernel void @test_mfma_f64_4x4x4f64(ptr addrspace(1) %arg, double
726726; GFX90A-VGPR-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
727727; GFX90A-VGPR-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
728728; GFX90A-VGPR-NEXT: s_waitcnt lgkmcnt(0)
729- ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[0:1 ], s[2:3], s[2:3] op_sel:[0,1]
730- ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[2:3 ], s[6:7], s[6:7] op_sel:[0,1]
729+ ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[2:3 ], s[2:3], s[2:3] op_sel:[0,1]
730+ ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[4:5 ], s[6:7], s[6:7] op_sel:[0,1]
731731; GFX90A-VGPR-NEXT: s_nop 1
732- ; GFX90A-VGPR-NEXT: v_mfma_f64_4x4x4f64 v[4:5 ], v[0:1 ], v[2:3 ], 0
732+ ; GFX90A-VGPR-NEXT: v_mfma_f64_4x4x4f64 v[0:1 ], v[2:3 ], v[4:5 ], 0
733733; GFX90A-VGPR-NEXT: s_nop 3
734- ; GFX90A-VGPR-NEXT: v_mfma_f64_4x4x4f64 v[0:1], v[0:1 ], v[2:3 ], v[4:5 ] cbsz:1 abid:2 blgp:3
734+ ; GFX90A-VGPR-NEXT: v_mfma_f64_4x4x4f64 v[0:1], v[2:3 ], v[4:5 ], v[0:1 ] cbsz:1 abid:2 blgp:3
735735; GFX90A-VGPR-NEXT: v_mov_b32_e32 v2, 0
736736; GFX90A-VGPR-NEXT: s_nop 7
737737; GFX90A-VGPR-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
@@ -742,12 +742,12 @@ define amdgpu_kernel void @test_mfma_f64_4x4x4f64(ptr addrspace(1) %arg, double
742742; GFX942-VGPR-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
743743; GFX942-VGPR-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
744744; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0)
745- ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[0:1 ], s[2:3]
746- ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[2:3 ], s[6:7]
745+ ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[2:3 ], s[2:3]
746+ ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[4:5 ], s[6:7]
747747; GFX942-VGPR-NEXT: s_nop 1
748- ; GFX942-VGPR-NEXT: v_mfma_f64_4x4x4_4b_f64 v[4:5 ], v[0:1 ], v[2:3 ], 0
748+ ; GFX942-VGPR-NEXT: v_mfma_f64_4x4x4_4b_f64 v[0:1 ], v[2:3 ], v[4:5 ], 0
749749; GFX942-VGPR-NEXT: s_nop 3
750- ; GFX942-VGPR-NEXT: v_mfma_f64_4x4x4_4b_f64 v[0:1], v[0:1 ], v[2:3 ], v[4:5 ] cbsz:1 abid:2 neg:[1,1,0]
750+ ; GFX942-VGPR-NEXT: v_mfma_f64_4x4x4_4b_f64 v[0:1], v[2:3 ], v[4:5 ], v[0:1 ] cbsz:1 abid:2 neg:[1,1,0]
751751; GFX942-VGPR-NEXT: v_mov_b32_e32 v2, 0
752752; GFX942-VGPR-NEXT: s_nop 7
753753; GFX942-VGPR-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
@@ -765,10 +765,10 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64(ptr addrspace(1) %arg, doubl
765765; GFX90A-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24
766766; GFX90A-NEXT: s_load_dwordx2 s[12:13], s[4:5], 0x34
767767; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
768- ; GFX90A-NEXT: v_mov_b32_e32 v2 , s10
768+ ; GFX90A-NEXT: v_mov_b32_e32 v0 , s10
769769; GFX90A-NEXT: s_load_dwordx8 s[0:7], s[8:9], 0x0
770- ; GFX90A-NEXT: v_mov_b32_e32 v3 , s11
771- ; GFX90A-NEXT: v_pk_mov_b32 v[0:1 ], s[12:13], s[12:13] op_sel:[0,1]
770+ ; GFX90A-NEXT: v_mov_b32_e32 v1 , s11
771+ ; GFX90A-NEXT: v_pk_mov_b32 v[2:3 ], s[12:13], s[12:13] op_sel:[0,1]
772772; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
773773; GFX90A-NEXT: v_accvgpr_write_b32 a0, s0
774774; GFX90A-NEXT: v_accvgpr_write_b32 a1, s1
@@ -779,7 +779,7 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64(ptr addrspace(1) %arg, doubl
779779; GFX90A-NEXT: v_accvgpr_write_b32 a6, s6
780780; GFX90A-NEXT: v_accvgpr_write_b32 a7, s7
781781; GFX90A-NEXT: s_nop 1
782- ; GFX90A-NEXT: v_mfma_f64_16x16x4f64 a[0:7], v[2:3 ], v[0:1 ], a[0:7] cbsz:1 abid:2 blgp:3
782+ ; GFX90A-NEXT: v_mfma_f64_16x16x4f64 a[0:7], v[0:1 ], v[2:3 ], a[0:7] cbsz:1 abid:2 blgp:3
783783; GFX90A-NEXT: v_mov_b32_e32 v0, 0
784784; GFX90A-NEXT: s_nop 15
785785; GFX90A-NEXT: s_nop 0
@@ -792,10 +792,10 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64(ptr addrspace(1) %arg, doubl
792792; GFX942-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24
793793; GFX942-NEXT: s_load_dwordx2 s[12:13], s[4:5], 0x34
794794; GFX942-NEXT: s_waitcnt lgkmcnt(0)
795- ; GFX942-NEXT: v_mov_b32_e32 v2 , s10
795+ ; GFX942-NEXT: v_mov_b32_e32 v0 , s10
796796; GFX942-NEXT: s_load_dwordx8 s[0:7], s[8:9], 0x0
797- ; GFX942-NEXT: v_mov_b32_e32 v3 , s11
798- ; GFX942-NEXT: v_mov_b64_e32 v[0:1 ], s[12:13]
797+ ; GFX942-NEXT: v_mov_b32_e32 v1 , s11
798+ ; GFX942-NEXT: v_mov_b64_e32 v[2:3 ], s[12:13]
799799; GFX942-NEXT: s_waitcnt lgkmcnt(0)
800800; GFX942-NEXT: v_accvgpr_write_b32 a0, s0
801801; GFX942-NEXT: v_accvgpr_write_b32 a1, s1
@@ -806,7 +806,7 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64(ptr addrspace(1) %arg, doubl
806806; GFX942-NEXT: v_accvgpr_write_b32 a6, s6
807807; GFX942-NEXT: v_accvgpr_write_b32 a7, s7
808808; GFX942-NEXT: s_nop 1
809- ; GFX942-NEXT: v_mfma_f64_16x16x4_f64 a[0:7], v[2:3 ], v[0:1 ], a[0:7] cbsz:1 abid:2 neg:[1,1,0]
809+ ; GFX942-NEXT: v_mfma_f64_16x16x4_f64 a[0:7], v[0:1 ], v[2:3 ], a[0:7] cbsz:1 abid:2 neg:[1,1,0]
810810; GFX942-NEXT: v_mov_b32_e32 v0, 0
811811; GFX942-NEXT: s_nop 15
812812; GFX942-NEXT: s_nop 0
@@ -819,17 +819,17 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64(ptr addrspace(1) %arg, doubl
819819; GFX90A-VGPR-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24
820820; GFX90A-VGPR-NEXT: s_load_dwordx2 s[12:13], s[4:5], 0x34
821821; GFX90A-VGPR-NEXT: s_waitcnt lgkmcnt(0)
822- ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v10 , s10
822+ ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v8 , s10
823823; GFX90A-VGPR-NEXT: s_load_dwordx8 s[0:7], s[8:9], 0x0
824- ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v11 , s11
825- ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[8:9 ], s[12:13], s[12:13] op_sel:[0,1]
824+ ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v9 , s11
825+ ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[10:11 ], s[12:13], s[12:13] op_sel:[0,1]
826826; GFX90A-VGPR-NEXT: s_waitcnt lgkmcnt(0)
827827; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
828828; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1]
829829; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[4:5], s[4:5], s[4:5] op_sel:[0,1]
830830; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[6:7], s[6:7], s[6:7] op_sel:[0,1]
831831; GFX90A-VGPR-NEXT: s_nop 1
832- ; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[0:7], v[10:11 ], v[8:9 ], v[0:7] cbsz:1 abid:2 blgp:3
832+ ; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[0:7], v[8:9 ], v[10:11 ], v[0:7] cbsz:1 abid:2 blgp:3
833833; GFX90A-VGPR-NEXT: v_mov_b32_e32 v8, 0
834834; GFX90A-VGPR-NEXT: s_nop 15
835835; GFX90A-VGPR-NEXT: s_nop 0
@@ -842,17 +842,17 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64(ptr addrspace(1) %arg, doubl
842842; GFX942-VGPR-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24
843843; GFX942-VGPR-NEXT: s_load_dwordx2 s[12:13], s[4:5], 0x34
844844; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0)
845- ; GFX942-VGPR-NEXT: v_mov_b32_e32 v10 , s10
845+ ; GFX942-VGPR-NEXT: v_mov_b32_e32 v8 , s10
846846; GFX942-VGPR-NEXT: s_load_dwordx8 s[0:7], s[8:9], 0x0
847- ; GFX942-VGPR-NEXT: v_mov_b32_e32 v11 , s11
848- ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[8:9 ], s[12:13]
847+ ; GFX942-VGPR-NEXT: v_mov_b32_e32 v9 , s11
848+ ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[10:11 ], s[12:13]
849849; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0)
850850; GFX942-VGPR-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
851851; GFX942-VGPR-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
852852; GFX942-VGPR-NEXT: v_mov_b64_e32 v[4:5], s[4:5]
853853; GFX942-VGPR-NEXT: v_mov_b64_e32 v[6:7], s[6:7]
854854; GFX942-VGPR-NEXT: s_nop 1
855- ; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[0:7], v[10:11 ], v[8:9 ], v[0:7] cbsz:1 abid:2 neg:[1,1,0]
855+ ; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[0:7], v[8:9 ], v[10:11 ], v[0:7] cbsz:1 abid:2 neg:[1,1,0]
856856; GFX942-VGPR-NEXT: v_mov_b32_e32 v8, 0
857857; GFX942-VGPR-NEXT: s_nop 15
858858; GFX942-VGPR-NEXT: s_nop 0
@@ -1629,20 +1629,20 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_imm(ptr addrspace(1) %arg, d
16291629; GFX90A-VGPR-NEXT: v_mov_b32_e32 v7, 0x3ff00000
16301630; GFX90A-VGPR-NEXT: v_mov_b32_e32 v2, v0
16311631; GFX90A-VGPR-NEXT: s_waitcnt lgkmcnt(0)
1632- ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v12 , s2
1633- ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v13 , s3
1632+ ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v10 , s2
1633+ ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v11 , s3
16341634; GFX90A-VGPR-NEXT: v_mov_b32_e32 v3, v0
16351635; GFX90A-VGPR-NEXT: v_mov_b32_e32 v4, v0
16361636; GFX90A-VGPR-NEXT: v_mov_b32_e32 v5, v0
16371637; GFX90A-VGPR-NEXT: v_mov_b32_e32 v6, v0
16381638; GFX90A-VGPR-NEXT: v_mov_b32_e32 v1, v0
16391639; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[8:9], v[6:7], v[6:7] op_sel:[0,1]
1640- ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[10:11 ], s[6:7], s[6:7] op_sel:[0,1]
1640+ ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[12:13 ], s[6:7], s[6:7] op_sel:[0,1]
16411641; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[6:7], v[4:5], v[4:5] op_sel:[0,1]
16421642; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1]
16431643; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1]
16441644; GFX90A-VGPR-NEXT: s_nop 1
1645- ; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[2:9], v[12:13 ], v[10:11 ], v[2:9]
1645+ ; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[2:9], v[10:11 ], v[12:13 ], v[2:9]
16461646; GFX90A-VGPR-NEXT: s_nop 15
16471647; GFX90A-VGPR-NEXT: s_nop 1
16481648; GFX90A-VGPR-NEXT: global_store_dwordx4 v0, v[6:9], s[0:1] offset:16
@@ -1657,20 +1657,20 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_imm(ptr addrspace(1) %arg, d
16571657; GFX942-VGPR-NEXT: v_mov_b32_e32 v7, 0x3ff00000
16581658; GFX942-VGPR-NEXT: v_mov_b32_e32 v2, v0
16591659; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0)
1660- ; GFX942-VGPR-NEXT: v_mov_b32_e32 v12 , s2
1661- ; GFX942-VGPR-NEXT: v_mov_b32_e32 v13 , s3
1660+ ; GFX942-VGPR-NEXT: v_mov_b32_e32 v10 , s2
1661+ ; GFX942-VGPR-NEXT: v_mov_b32_e32 v11 , s3
16621662; GFX942-VGPR-NEXT: v_mov_b32_e32 v3, v0
16631663; GFX942-VGPR-NEXT: v_mov_b32_e32 v4, v0
16641664; GFX942-VGPR-NEXT: v_mov_b32_e32 v5, v0
16651665; GFX942-VGPR-NEXT: v_mov_b32_e32 v6, v0
16661666; GFX942-VGPR-NEXT: v_mov_b32_e32 v1, v0
16671667; GFX942-VGPR-NEXT: v_mov_b64_e32 v[8:9], v[6:7]
1668- ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[10:11 ], s[6:7]
1668+ ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[12:13 ], s[6:7]
16691669; GFX942-VGPR-NEXT: v_mov_b64_e32 v[6:7], v[4:5]
16701670; GFX942-VGPR-NEXT: v_mov_b64_e32 v[4:5], v[2:3]
16711671; GFX942-VGPR-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
16721672; GFX942-VGPR-NEXT: s_nop 1
1673- ; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[2:9], v[12:13 ], v[10:11 ], v[2:9]
1673+ ; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[2:9], v[10:11 ], v[12:13 ], v[2:9]
16741674; GFX942-VGPR-NEXT: s_nop 15
16751675; GFX942-VGPR-NEXT: s_nop 1
16761676; GFX942-VGPR-NEXT: global_store_dwordx4 v0, v[6:9], s[0:1] offset:16
@@ -1743,20 +1743,20 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_lit(ptr addrspace(1) %
17431743; GFX90A-VGPR-NEXT: v_mov_b32_e32 v1, 0x405ec000
17441744; GFX90A-VGPR-NEXT: v_mov_b32_e32 v2, v0
17451745; GFX90A-VGPR-NEXT: s_waitcnt lgkmcnt(0)
1746- ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v12 , s2
1747- ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v13 , s3
1746+ ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v10 , s2
1747+ ; GFX90A-VGPR-NEXT: v_mov_b32_e32 v11 , s3
17481748; GFX90A-VGPR-NEXT: v_mov_b32_e32 v3, v1
17491749; GFX90A-VGPR-NEXT: v_mov_b32_e32 v4, v0
17501750; GFX90A-VGPR-NEXT: v_mov_b32_e32 v5, v1
17511751; GFX90A-VGPR-NEXT: v_mov_b32_e32 v6, v0
17521752; GFX90A-VGPR-NEXT: v_mov_b32_e32 v7, v1
17531753; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[8:9], v[6:7], v[6:7] op_sel:[0,1]
1754- ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[10:11 ], s[6:7], s[6:7] op_sel:[0,1]
1754+ ; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[12:13 ], s[6:7], s[6:7] op_sel:[0,1]
17551755; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[6:7], v[4:5], v[4:5] op_sel:[0,1]
17561756; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1]
17571757; GFX90A-VGPR-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1]
17581758; GFX90A-VGPR-NEXT: s_nop 1
1759- ; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[2:9], v[12:13 ], v[10:11 ], v[2:9]
1759+ ; GFX90A-VGPR-NEXT: v_mfma_f64_16x16x4f64 v[2:9], v[10:11 ], v[12:13 ], v[2:9]
17601760; GFX90A-VGPR-NEXT: s_nop 15
17611761; GFX90A-VGPR-NEXT: s_nop 1
17621762; GFX90A-VGPR-NEXT: global_store_dwordx4 v0, v[6:9], s[0:1] offset:16
@@ -1771,20 +1771,20 @@ define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_lit(ptr addrspace(1) %
17711771; GFX942-VGPR-NEXT: v_mov_b32_e32 v1, 0x405ec000
17721772; GFX942-VGPR-NEXT: v_mov_b32_e32 v2, v0
17731773; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0)
1774- ; GFX942-VGPR-NEXT: v_mov_b32_e32 v12 , s2
1775- ; GFX942-VGPR-NEXT: v_mov_b32_e32 v13 , s3
1774+ ; GFX942-VGPR-NEXT: v_mov_b32_e32 v10 , s2
1775+ ; GFX942-VGPR-NEXT: v_mov_b32_e32 v11 , s3
17761776; GFX942-VGPR-NEXT: v_mov_b32_e32 v3, v1
17771777; GFX942-VGPR-NEXT: v_mov_b32_e32 v4, v0
17781778; GFX942-VGPR-NEXT: v_mov_b32_e32 v5, v1
17791779; GFX942-VGPR-NEXT: v_mov_b32_e32 v6, v0
17801780; GFX942-VGPR-NEXT: v_mov_b32_e32 v7, v1
17811781; GFX942-VGPR-NEXT: v_mov_b64_e32 v[8:9], v[6:7]
1782- ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[10:11 ], s[6:7]
1782+ ; GFX942-VGPR-NEXT: v_mov_b64_e32 v[12:13 ], s[6:7]
17831783; GFX942-VGPR-NEXT: v_mov_b64_e32 v[6:7], v[4:5]
17841784; GFX942-VGPR-NEXT: v_mov_b64_e32 v[4:5], v[2:3]
17851785; GFX942-VGPR-NEXT: v_mov_b64_e32 v[2:3], v[0:1]
17861786; GFX942-VGPR-NEXT: s_nop 1
1787- ; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[2:9], v[12:13 ], v[10:11 ], v[2:9]
1787+ ; GFX942-VGPR-NEXT: v_mfma_f64_16x16x4_f64 v[2:9], v[10:11 ], v[12:13 ], v[2:9]
17881788; GFX942-VGPR-NEXT: s_nop 15
17891789; GFX942-VGPR-NEXT: s_nop 1
17901790; GFX942-VGPR-NEXT: global_store_dwordx4 v0, v[6:9], s[0:1] offset:16
0 commit comments