@@ -38,10 +38,10 @@ define amdgpu_gfx void @s_set_rounding(i32 inreg %rounding) {
3838; GFX10: ; %bb.0:
3939; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
4040; GFX10-NEXT: s_add_i32 s34, s4, -4
41- ; GFX10-NEXT: s_min_u32 s34, s4, s34
42- ; GFX10-NEXT: s_lshl_b32 s36, s34, 2
41+ ; GFX10-NEXT: s_min_u32 s36, s4, s34
4342; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f
4443; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9
44+ ; GFX10-NEXT: s_lshl_b32 s36, s36, 2
4545; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
4646; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
4747; GFX10-NEXT: s_setpc_b64 s[30:31]
@@ -50,10 +50,10 @@ define amdgpu_gfx void @s_set_rounding(i32 inreg %rounding) {
5050; GFX11: ; %bb.0:
5151; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
5252; GFX11-NEXT: s_add_i32 s0, s4, -4
53- ; GFX11-NEXT: s_min_u32 s0, s4, s0
54- ; GFX11-NEXT: s_lshl_b32 s2, s0, 2
53+ ; GFX11-NEXT: s_min_u32 s2, s4, s0
5554; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f
5655; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9
56+ ; GFX11-NEXT: s_lshl_b32 s2, s2, 2
5757; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
5858; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
5959; GFX11-NEXT: s_setpc_b64 s[30:31]
@@ -124,29 +124,29 @@ define amdgpu_kernel void @s_set_rounding_kernel(i32 inreg %rounding) {
124124;
125125; GFX10-LABEL: s_set_rounding_kernel:
126126; GFX10: ; %bb.0:
127- ; GFX10-NEXT: s_load_dword s0, s[4:5], 0x24
127+ ; GFX10-NEXT: s_load_dword s2, s[4:5], 0x24
128+ ; GFX10-NEXT: s_mov_b32 s0, 0x1c84a50f
129+ ; GFX10-NEXT: s_mov_b32 s1, 0xb73e62d9
128130; GFX10-NEXT: ;;#ASMSTART
129131; GFX10-NEXT: ;;#ASMEND
130132; GFX10-NEXT: s_waitcnt lgkmcnt(0)
131- ; GFX10-NEXT: s_add_i32 s1, s0, -4
132- ; GFX10-NEXT: s_min_u32 s2, s0, s1
133- ; GFX10-NEXT: s_mov_b32 s0, 0x1c84a50f
134- ; GFX10-NEXT: s_mov_b32 s1, 0xb73e62d9
133+ ; GFX10-NEXT: s_add_i32 s3, s2, -4
134+ ; GFX10-NEXT: s_min_u32 s2, s2, s3
135135; GFX10-NEXT: s_lshl_b32 s2, s2, 2
136136; GFX10-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
137137; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
138138; GFX10-NEXT: s_endpgm
139139;
140140; GFX11-LABEL: s_set_rounding_kernel:
141141; GFX11: ; %bb.0:
142- ; GFX11-NEXT: s_load_b32 s0, s[4:5], 0x24
142+ ; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x24
143+ ; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f
144+ ; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9
143145; GFX11-NEXT: ;;#ASMSTART
144146; GFX11-NEXT: ;;#ASMEND
145147; GFX11-NEXT: s_waitcnt lgkmcnt(0)
146- ; GFX11-NEXT: s_add_i32 s1, s0, -4
147- ; GFX11-NEXT: s_min_u32 s2, s0, s1
148- ; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f
149- ; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9
148+ ; GFX11-NEXT: s_add_i32 s3, s2, -4
149+ ; GFX11-NEXT: s_min_u32 s2, s2, s3
150150; GFX11-NEXT: s_lshl_b32 s2, s2, 2
151151; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
152152; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
@@ -294,10 +294,10 @@ define void @set_rounding_get_rounding() {
294294; GFX10-NEXT: s_cmp_lt_u32 s4, 4
295295; GFX10-NEXT: s_cselect_b32 s4, s4, s5
296296; GFX10-NEXT: s_add_i32 s5, s4, -4
297- ; GFX10-NEXT: s_min_u32 s4, s4, s5
298- ; GFX10-NEXT: s_lshl_b32 s6, s4, 2
297+ ; GFX10-NEXT: s_min_u32 s6, s4, s5
299298; GFX10-NEXT: s_mov_b32 s4, 0x1c84a50f
300299; GFX10-NEXT: s_mov_b32 s5, 0xb73e62d9
300+ ; GFX10-NEXT: s_lshl_b32 s6, s6, 2
301301; GFX10-NEXT: s_lshr_b64 s[4:5], s[4:5], s6
302302; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4
303303; GFX10-NEXT: s_setpc_b64 s[30:31]
@@ -315,10 +315,10 @@ define void @set_rounding_get_rounding() {
315315; GFX11-NEXT: s_cmp_lt_u32 s0, 4
316316; GFX11-NEXT: s_cselect_b32 s0, s0, s1
317317; GFX11-NEXT: s_add_i32 s1, s0, -4
318- ; GFX11-NEXT: s_min_u32 s0, s0, s1
319- ; GFX11-NEXT: s_lshl_b32 s2, s0, 2
318+ ; GFX11-NEXT: s_min_u32 s2, s0, s1
320319; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f
321320; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9
321+ ; GFX11-NEXT: s_lshl_b32 s2, s2, 2
322322; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
323323; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
324324; GFX11-NEXT: s_setpc_b64 s[30:31]
@@ -974,10 +974,10 @@ define amdgpu_gfx void @s_set_rounding_i2_signext(i2 signext inreg %rounding) {
974974; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
975975; GFX10-NEXT: s_sext_i32_i16 s34, s4
976976; GFX10-NEXT: s_add_i32 s35, s34, -4
977- ; GFX10-NEXT: s_min_u32 s34, s34, s35
978- ; GFX10-NEXT: s_lshl_b32 s36, s34, 2
977+ ; GFX10-NEXT: s_min_u32 s36, s34, s35
979978; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f
980979; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9
980+ ; GFX10-NEXT: s_lshl_b32 s36, s36, 2
981981; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
982982; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
983983; GFX10-NEXT: s_setpc_b64 s[30:31]
@@ -987,10 +987,10 @@ define amdgpu_gfx void @s_set_rounding_i2_signext(i2 signext inreg %rounding) {
987987; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
988988; GFX11-NEXT: s_sext_i32_i16 s0, s4
989989; GFX11-NEXT: s_add_i32 s1, s0, -4
990- ; GFX11-NEXT: s_min_u32 s0, s0, s1
991- ; GFX11-NEXT: s_lshl_b32 s2, s0, 2
990+ ; GFX11-NEXT: s_min_u32 s2, s0, s1
992991; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f
993992; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9
993+ ; GFX11-NEXT: s_lshl_b32 s2, s2, 2
994994; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
995995; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
996996; GFX11-NEXT: s_setpc_b64 s[30:31]
@@ -1055,10 +1055,10 @@ define amdgpu_gfx void @s_set_rounding_i3_signext(i3 signext inreg %rounding) {
10551055; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10561056; GFX10-NEXT: s_sext_i32_i16 s34, s4
10571057; GFX10-NEXT: s_add_i32 s35, s34, -4
1058- ; GFX10-NEXT: s_min_u32 s34, s34, s35
1059- ; GFX10-NEXT: s_lshl_b32 s36, s34, 2
1058+ ; GFX10-NEXT: s_min_u32 s36, s34, s35
10601059; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f
10611060; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9
1061+ ; GFX10-NEXT: s_lshl_b32 s36, s36, 2
10621062; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
10631063; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
10641064; GFX10-NEXT: s_setpc_b64 s[30:31]
@@ -1068,10 +1068,10 @@ define amdgpu_gfx void @s_set_rounding_i3_signext(i3 signext inreg %rounding) {
10681068; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10691069; GFX11-NEXT: s_sext_i32_i16 s0, s4
10701070; GFX11-NEXT: s_add_i32 s1, s0, -4
1071- ; GFX11-NEXT: s_min_u32 s0, s0, s1
1072- ; GFX11-NEXT: s_lshl_b32 s2, s0, 2
1071+ ; GFX11-NEXT: s_min_u32 s2, s0, s1
10731072; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f
10741073; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9
1074+ ; GFX11-NEXT: s_lshl_b32 s2, s2, 2
10751075; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
10761076; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
10771077; GFX11-NEXT: s_setpc_b64 s[30:31]
@@ -1136,10 +1136,10 @@ define amdgpu_gfx void @s_set_rounding_i3_zeroext(i3 zeroext inreg %rounding) {
11361136; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11371137; GFX10-NEXT: s_and_b32 s34, 0xffff, s4
11381138; GFX10-NEXT: s_add_i32 s35, s34, -4
1139- ; GFX10-NEXT: s_min_u32 s34, s34, s35
1140- ; GFX10-NEXT: s_lshl_b32 s36, s34, 2
1139+ ; GFX10-NEXT: s_min_u32 s36, s34, s35
11411140; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f
11421141; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9
1142+ ; GFX10-NEXT: s_lshl_b32 s36, s36, 2
11431143; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
11441144; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
11451145; GFX10-NEXT: s_setpc_b64 s[30:31]
@@ -1149,10 +1149,10 @@ define amdgpu_gfx void @s_set_rounding_i3_zeroext(i3 zeroext inreg %rounding) {
11491149; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11501150; GFX11-NEXT: s_and_b32 s0, 0xffff, s4
11511151; GFX11-NEXT: s_add_i32 s1, s0, -4
1152- ; GFX11-NEXT: s_min_u32 s0, s0, s1
1153- ; GFX11-NEXT: s_lshl_b32 s2, s0, 2
1152+ ; GFX11-NEXT: s_min_u32 s2, s0, s1
11541153; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f
11551154; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9
1155+ ; GFX11-NEXT: s_lshl_b32 s2, s2, 2
11561156; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
11571157; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
11581158; GFX11-NEXT: s_setpc_b64 s[30:31]
@@ -1569,10 +1569,10 @@ define amdgpu_gfx void @s_set_rounding_select_4_0(i32 inreg %cond) {
15691569; GFX10-NEXT: v_readfirstlane_b32 s34, v0
15701570; GFX10-NEXT: s_lshl_b32 s34, s34, 2
15711571; GFX10-NEXT: s_add_i32 s35, s34, -4
1572- ; GFX10-NEXT: s_min_u32 s34, s34, s35
1573- ; GFX10-NEXT: s_lshl_b32 s36, s34, 2
1572+ ; GFX10-NEXT: s_min_u32 s36, s34, s35
15741573; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f
15751574; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9
1575+ ; GFX10-NEXT: s_lshl_b32 s36, s36, 2
15761576; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
15771577; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
15781578; GFX10-NEXT: s_setpc_b64 s[30:31]
@@ -1586,10 +1586,10 @@ define amdgpu_gfx void @s_set_rounding_select_4_0(i32 inreg %cond) {
15861586; GFX11-NEXT: v_readfirstlane_b32 s0, v0
15871587; GFX11-NEXT: s_lshl_b32 s0, s0, 2
15881588; GFX11-NEXT: s_add_i32 s1, s0, -4
1589- ; GFX11-NEXT: s_min_u32 s0, s0, s1
1590- ; GFX11-NEXT: s_lshl_b32 s2, s0, 2
1589+ ; GFX11-NEXT: s_min_u32 s2, s0, s1
15911590; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f
15921591; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9
1592+ ; GFX11-NEXT: s_lshl_b32 s2, s2, 2
15931593; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
15941594; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
15951595; GFX11-NEXT: s_setpc_b64 s[30:31]
@@ -1634,10 +1634,10 @@ define amdgpu_gfx void @s_set_rounding_select_3_5(i32 inreg %cond) {
16341634; GFX10-NEXT: s_cmp_eq_u32 s4, 0
16351635; GFX10-NEXT: s_cselect_b32 s34, 3, 5
16361636; GFX10-NEXT: s_add_i32 s35, s34, -4
1637- ; GFX10-NEXT: s_min_u32 s34, s34, s35
1638- ; GFX10-NEXT: s_lshl_b32 s36, s34, 2
1637+ ; GFX10-NEXT: s_min_u32 s36, s34, s35
16391638; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f
16401639; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9
1640+ ; GFX10-NEXT: s_lshl_b32 s36, s36, 2
16411641; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36
16421642; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34
16431643; GFX10-NEXT: s_setpc_b64 s[30:31]
@@ -1648,10 +1648,10 @@ define amdgpu_gfx void @s_set_rounding_select_3_5(i32 inreg %cond) {
16481648; GFX11-NEXT: s_cmp_eq_u32 s4, 0
16491649; GFX11-NEXT: s_cselect_b32 s0, 3, 5
16501650; GFX11-NEXT: s_add_i32 s1, s0, -4
1651- ; GFX11-NEXT: s_min_u32 s0, s0, s1
1652- ; GFX11-NEXT: s_lshl_b32 s2, s0, 2
1651+ ; GFX11-NEXT: s_min_u32 s2, s0, s1
16531652; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f
16541653; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9
1654+ ; GFX11-NEXT: s_lshl_b32 s2, s2, 2
16551655; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
16561656; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0
16571657; GFX11-NEXT: s_setpc_b64 s[30:31]
@@ -1747,13 +1747,13 @@ define amdgpu_kernel void @get_rounding_after_set_rounding_1() {
17471747; GFX10-LABEL: get_rounding_after_set_rounding_1:
17481748; GFX10: ; %bb.0:
17491749; GFX10-NEXT: s_round_mode 0x0
1750- ; GFX10-NEXT: v_mov_b32_e32 v0, 0
1751- ; GFX10-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 4)
1752- ; GFX10-NEXT: v_mov_b32_e32 v1, 0
1753- ; GFX10-NEXT: s_lshl_b32 s2, s0, 2
17541750; GFX10-NEXT: s_mov_b32 s0, 0xeb24da71
1751+ ; GFX10-NEXT: s_getreg_b32 s2, hwreg(HW_REG_MODE, 0, 4)
17551752; GFX10-NEXT: s_mov_b32 s1, 0xc96f385
1753+ ; GFX10-NEXT: s_lshl_b32 s2, s2, 2
1754+ ; GFX10-NEXT: v_mov_b32_e32 v0, 0
17561755; GFX10-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
1756+ ; GFX10-NEXT: v_mov_b32_e32 v1, 0
17571757; GFX10-NEXT: s_and_b32 s0, s0, 15
17581758; GFX10-NEXT: s_add_i32 s1, s0, 4
17591759; GFX10-NEXT: s_cmp_lt_u32 s0, 4
@@ -1766,11 +1766,11 @@ define amdgpu_kernel void @get_rounding_after_set_rounding_1() {
17661766; GFX11-LABEL: get_rounding_after_set_rounding_1:
17671767; GFX11: ; %bb.0:
17681768; GFX11-NEXT: s_round_mode 0x0
1769- ; GFX11-NEXT: v_mov_b32_e32 v0, 0
1770- ; GFX11-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 4)
1771- ; GFX11-NEXT: s_lshl_b32 s2, s0, 2
17721769; GFX11-NEXT: s_mov_b32 s0, 0xeb24da71
1770+ ; GFX11-NEXT: s_getreg_b32 s2, hwreg(HW_REG_MODE, 0, 4)
17731771; GFX11-NEXT: s_mov_b32 s1, 0xc96f385
1772+ ; GFX11-NEXT: s_lshl_b32 s2, s2, 2
1773+ ; GFX11-NEXT: v_mov_b32_e32 v0, 0
17741774; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2
17751775; GFX11-NEXT: s_and_b32 s0, s0, 15
17761776; GFX11-NEXT: s_add_i32 s1, s0, 4
0 commit comments