@@ -167,25 +167,25 @@ define void @generic_4xi8(ptr %a) {
167167; CHECK-NEXT: // %bb.0:
168168; CHECK-NEXT: ld.param.u64 %rd1, [generic_4xi8_param_0];
169169; CHECK-NEXT: ld.u32 %r1, [%rd1];
170- ; CHECK-NEXT: bfe.u32 %r2, %r1, 0 , 8;
170+ ; CHECK-NEXT: bfe.u32 %r2, %r1, 24 , 8;
171171; CHECK-NEXT: cvt.u16.u32 %rs1, %r2;
172172; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
173173; CHECK-NEXT: cvt.u32.u16 %r3, %rs2;
174- ; CHECK-NEXT: bfe.u32 %r4, %r1, 8 , 8;
174+ ; CHECK-NEXT: bfe.u32 %r4, %r1, 16 , 8;
175175; CHECK-NEXT: cvt.u16.u32 %rs3, %r4;
176176; CHECK-NEXT: add.s16 %rs4, %rs3, 1;
177177; CHECK-NEXT: cvt.u32.u16 %r5, %rs4;
178- ; CHECK-NEXT: bfi .b32 %r6, %r5, %r3, 8, 8 ;
179- ; CHECK-NEXT: bfe.u32 %r7, %r1, 16 , 8;
178+ ; CHECK-NEXT: prmt .b32 %r6, %r5, %r3, 13120 ;
179+ ; CHECK-NEXT: bfe.u32 %r7, %r1, 8 , 8;
180180; CHECK-NEXT: cvt.u16.u32 %rs5, %r7;
181181; CHECK-NEXT: add.s16 %rs6, %rs5, 1;
182182; CHECK-NEXT: cvt.u32.u16 %r8, %rs6;
183- ; CHECK-NEXT: bfi.b32 %r9, %r8, %r6, 16, 8;
184- ; CHECK-NEXT: bfe.u32 %r10, %r1, 24, 8;
185- ; CHECK-NEXT: cvt.u16.u32 %rs7, %r10;
183+ ; CHECK-NEXT: bfe.u32 %r9, %r1, 0, 8;
184+ ; CHECK-NEXT: cvt.u16.u32 %rs7, %r9;
186185; CHECK-NEXT: add.s16 %rs8, %rs7, 1;
187- ; CHECK-NEXT: cvt.u32.u16 %r11, %rs8;
188- ; CHECK-NEXT: bfi.b32 %r12, %r11, %r9, 24, 8;
186+ ; CHECK-NEXT: cvt.u32.u16 %r10, %rs8;
187+ ; CHECK-NEXT: prmt.b32 %r11, %r10, %r8, 13120;
188+ ; CHECK-NEXT: prmt.b32 %r12, %r11, %r6, 21520;
189189; CHECK-NEXT: st.u32 [%rd1], %r12;
190190; CHECK-NEXT: ret;
191191 %a.load = load <4 x i8 >, ptr %a
@@ -511,25 +511,25 @@ define void @generic_volatile_4xi8(ptr %a) {
511511; CHECK-NEXT: // %bb.0:
512512; CHECK-NEXT: ld.param.u64 %rd1, [generic_volatile_4xi8_param_0];
513513; CHECK-NEXT: ld.volatile.u32 %r1, [%rd1];
514- ; CHECK-NEXT: bfe.u32 %r2, %r1, 0 , 8;
514+ ; CHECK-NEXT: bfe.u32 %r2, %r1, 24 , 8;
515515; CHECK-NEXT: cvt.u16.u32 %rs1, %r2;
516516; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
517517; CHECK-NEXT: cvt.u32.u16 %r3, %rs2;
518- ; CHECK-NEXT: bfe.u32 %r4, %r1, 8 , 8;
518+ ; CHECK-NEXT: bfe.u32 %r4, %r1, 16 , 8;
519519; CHECK-NEXT: cvt.u16.u32 %rs3, %r4;
520520; CHECK-NEXT: add.s16 %rs4, %rs3, 1;
521521; CHECK-NEXT: cvt.u32.u16 %r5, %rs4;
522- ; CHECK-NEXT: bfi .b32 %r6, %r5, %r3, 8, 8 ;
523- ; CHECK-NEXT: bfe.u32 %r7, %r1, 16 , 8;
522+ ; CHECK-NEXT: prmt .b32 %r6, %r5, %r3, 13120 ;
523+ ; CHECK-NEXT: bfe.u32 %r7, %r1, 8 , 8;
524524; CHECK-NEXT: cvt.u16.u32 %rs5, %r7;
525525; CHECK-NEXT: add.s16 %rs6, %rs5, 1;
526526; CHECK-NEXT: cvt.u32.u16 %r8, %rs6;
527- ; CHECK-NEXT: bfi.b32 %r9, %r8, %r6, 16, 8;
528- ; CHECK-NEXT: bfe.u32 %r10, %r1, 24, 8;
529- ; CHECK-NEXT: cvt.u16.u32 %rs7, %r10;
527+ ; CHECK-NEXT: bfe.u32 %r9, %r1, 0, 8;
528+ ; CHECK-NEXT: cvt.u16.u32 %rs7, %r9;
530529; CHECK-NEXT: add.s16 %rs8, %rs7, 1;
531- ; CHECK-NEXT: cvt.u32.u16 %r11, %rs8;
532- ; CHECK-NEXT: bfi.b32 %r12, %r11, %r9, 24, 8;
530+ ; CHECK-NEXT: cvt.u32.u16 %r10, %rs8;
531+ ; CHECK-NEXT: prmt.b32 %r11, %r10, %r8, 13120;
532+ ; CHECK-NEXT: prmt.b32 %r12, %r11, %r6, 21520;
533533; CHECK-NEXT: st.volatile.u32 [%rd1], %r12;
534534; CHECK-NEXT: ret;
535535 %a.load = load volatile <4 x i8 >, ptr %a
@@ -1416,25 +1416,25 @@ define void @global_4xi8(ptr addrspace(1) %a) {
14161416; CHECK-NEXT: // %bb.0:
14171417; CHECK-NEXT: ld.param.u64 %rd1, [global_4xi8_param_0];
14181418; CHECK-NEXT: ld.global.u32 %r1, [%rd1];
1419- ; CHECK-NEXT: bfe.u32 %r2, %r1, 0 , 8;
1419+ ; CHECK-NEXT: bfe.u32 %r2, %r1, 24 , 8;
14201420; CHECK-NEXT: cvt.u16.u32 %rs1, %r2;
14211421; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
14221422; CHECK-NEXT: cvt.u32.u16 %r3, %rs2;
1423- ; CHECK-NEXT: bfe.u32 %r4, %r1, 8 , 8;
1423+ ; CHECK-NEXT: bfe.u32 %r4, %r1, 16 , 8;
14241424; CHECK-NEXT: cvt.u16.u32 %rs3, %r4;
14251425; CHECK-NEXT: add.s16 %rs4, %rs3, 1;
14261426; CHECK-NEXT: cvt.u32.u16 %r5, %rs4;
1427- ; CHECK-NEXT: bfi .b32 %r6, %r5, %r3, 8, 8 ;
1428- ; CHECK-NEXT: bfe.u32 %r7, %r1, 16 , 8;
1427+ ; CHECK-NEXT: prmt .b32 %r6, %r5, %r3, 13120 ;
1428+ ; CHECK-NEXT: bfe.u32 %r7, %r1, 8 , 8;
14291429; CHECK-NEXT: cvt.u16.u32 %rs5, %r7;
14301430; CHECK-NEXT: add.s16 %rs6, %rs5, 1;
14311431; CHECK-NEXT: cvt.u32.u16 %r8, %rs6;
1432- ; CHECK-NEXT: bfi.b32 %r9, %r8, %r6, 16, 8;
1433- ; CHECK-NEXT: bfe.u32 %r10, %r1, 24, 8;
1434- ; CHECK-NEXT: cvt.u16.u32 %rs7, %r10;
1432+ ; CHECK-NEXT: bfe.u32 %r9, %r1, 0, 8;
1433+ ; CHECK-NEXT: cvt.u16.u32 %rs7, %r9;
14351434; CHECK-NEXT: add.s16 %rs8, %rs7, 1;
1436- ; CHECK-NEXT: cvt.u32.u16 %r11, %rs8;
1437- ; CHECK-NEXT: bfi.b32 %r12, %r11, %r9, 24, 8;
1435+ ; CHECK-NEXT: cvt.u32.u16 %r10, %rs8;
1436+ ; CHECK-NEXT: prmt.b32 %r11, %r10, %r8, 13120;
1437+ ; CHECK-NEXT: prmt.b32 %r12, %r11, %r6, 21520;
14381438; CHECK-NEXT: st.global.u32 [%rd1], %r12;
14391439; CHECK-NEXT: ret;
14401440 %a.load = load <4 x i8 >, ptr addrspace (1 ) %a
@@ -1741,25 +1741,25 @@ define void @global_volatile_4xi8(ptr addrspace(1) %a) {
17411741; CHECK-NEXT: // %bb.0:
17421742; CHECK-NEXT: ld.param.u64 %rd1, [global_volatile_4xi8_param_0];
17431743; CHECK-NEXT: ld.volatile.global.u32 %r1, [%rd1];
1744- ; CHECK-NEXT: bfe.u32 %r2, %r1, 0 , 8;
1744+ ; CHECK-NEXT: bfe.u32 %r2, %r1, 24 , 8;
17451745; CHECK-NEXT: cvt.u16.u32 %rs1, %r2;
17461746; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
17471747; CHECK-NEXT: cvt.u32.u16 %r3, %rs2;
1748- ; CHECK-NEXT: bfe.u32 %r4, %r1, 8 , 8;
1748+ ; CHECK-NEXT: bfe.u32 %r4, %r1, 16 , 8;
17491749; CHECK-NEXT: cvt.u16.u32 %rs3, %r4;
17501750; CHECK-NEXT: add.s16 %rs4, %rs3, 1;
17511751; CHECK-NEXT: cvt.u32.u16 %r5, %rs4;
1752- ; CHECK-NEXT: bfi .b32 %r6, %r5, %r3, 8, 8 ;
1753- ; CHECK-NEXT: bfe.u32 %r7, %r1, 16 , 8;
1752+ ; CHECK-NEXT: prmt .b32 %r6, %r5, %r3, 13120 ;
1753+ ; CHECK-NEXT: bfe.u32 %r7, %r1, 8 , 8;
17541754; CHECK-NEXT: cvt.u16.u32 %rs5, %r7;
17551755; CHECK-NEXT: add.s16 %rs6, %rs5, 1;
17561756; CHECK-NEXT: cvt.u32.u16 %r8, %rs6;
1757- ; CHECK-NEXT: bfi.b32 %r9, %r8, %r6, 16, 8;
1758- ; CHECK-NEXT: bfe.u32 %r10, %r1, 24, 8;
1759- ; CHECK-NEXT: cvt.u16.u32 %rs7, %r10;
1757+ ; CHECK-NEXT: bfe.u32 %r9, %r1, 0, 8;
1758+ ; CHECK-NEXT: cvt.u16.u32 %rs7, %r9;
17601759; CHECK-NEXT: add.s16 %rs8, %rs7, 1;
1761- ; CHECK-NEXT: cvt.u32.u16 %r11, %rs8;
1762- ; CHECK-NEXT: bfi.b32 %r12, %r11, %r9, 24, 8;
1760+ ; CHECK-NEXT: cvt.u32.u16 %r10, %rs8;
1761+ ; CHECK-NEXT: prmt.b32 %r11, %r10, %r8, 13120;
1762+ ; CHECK-NEXT: prmt.b32 %r12, %r11, %r6, 21520;
17631763; CHECK-NEXT: st.volatile.global.u32 [%rd1], %r12;
17641764; CHECK-NEXT: ret;
17651765 %a.load = load volatile <4 x i8 >, ptr addrspace (1 ) %a
@@ -2788,25 +2788,25 @@ define void @shared_4xi8(ptr addrspace(3) %a) {
27882788; CHECK-NEXT: // %bb.0:
27892789; CHECK-NEXT: ld.param.u64 %rd1, [shared_4xi8_param_0];
27902790; CHECK-NEXT: ld.shared.u32 %r1, [%rd1];
2791- ; CHECK-NEXT: bfe.u32 %r2, %r1, 0 , 8;
2791+ ; CHECK-NEXT: bfe.u32 %r2, %r1, 24 , 8;
27922792; CHECK-NEXT: cvt.u16.u32 %rs1, %r2;
27932793; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
27942794; CHECK-NEXT: cvt.u32.u16 %r3, %rs2;
2795- ; CHECK-NEXT: bfe.u32 %r4, %r1, 8 , 8;
2795+ ; CHECK-NEXT: bfe.u32 %r4, %r1, 16 , 8;
27962796; CHECK-NEXT: cvt.u16.u32 %rs3, %r4;
27972797; CHECK-NEXT: add.s16 %rs4, %rs3, 1;
27982798; CHECK-NEXT: cvt.u32.u16 %r5, %rs4;
2799- ; CHECK-NEXT: bfi .b32 %r6, %r5, %r3, 8, 8 ;
2800- ; CHECK-NEXT: bfe.u32 %r7, %r1, 16 , 8;
2799+ ; CHECK-NEXT: prmt .b32 %r6, %r5, %r3, 13120 ;
2800+ ; CHECK-NEXT: bfe.u32 %r7, %r1, 8 , 8;
28012801; CHECK-NEXT: cvt.u16.u32 %rs5, %r7;
28022802; CHECK-NEXT: add.s16 %rs6, %rs5, 1;
28032803; CHECK-NEXT: cvt.u32.u16 %r8, %rs6;
2804- ; CHECK-NEXT: bfi.b32 %r9, %r8, %r6, 16, 8;
2805- ; CHECK-NEXT: bfe.u32 %r10, %r1, 24, 8;
2806- ; CHECK-NEXT: cvt.u16.u32 %rs7, %r10;
2804+ ; CHECK-NEXT: bfe.u32 %r9, %r1, 0, 8;
2805+ ; CHECK-NEXT: cvt.u16.u32 %rs7, %r9;
28072806; CHECK-NEXT: add.s16 %rs8, %rs7, 1;
2808- ; CHECK-NEXT: cvt.u32.u16 %r11, %rs8;
2809- ; CHECK-NEXT: bfi.b32 %r12, %r11, %r9, 24, 8;
2807+ ; CHECK-NEXT: cvt.u32.u16 %r10, %rs8;
2808+ ; CHECK-NEXT: prmt.b32 %r11, %r10, %r8, 13120;
2809+ ; CHECK-NEXT: prmt.b32 %r12, %r11, %r6, 21520;
28102810; CHECK-NEXT: st.shared.u32 [%rd1], %r12;
28112811; CHECK-NEXT: ret;
28122812 %a.load = load <4 x i8 >, ptr addrspace (3 ) %a
@@ -3113,25 +3113,25 @@ define void @shared_volatile_4xi8(ptr addrspace(3) %a) {
31133113; CHECK-NEXT: // %bb.0:
31143114; CHECK-NEXT: ld.param.u64 %rd1, [shared_volatile_4xi8_param_0];
31153115; CHECK-NEXT: ld.volatile.shared.u32 %r1, [%rd1];
3116- ; CHECK-NEXT: bfe.u32 %r2, %r1, 0 , 8;
3116+ ; CHECK-NEXT: bfe.u32 %r2, %r1, 24 , 8;
31173117; CHECK-NEXT: cvt.u16.u32 %rs1, %r2;
31183118; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
31193119; CHECK-NEXT: cvt.u32.u16 %r3, %rs2;
3120- ; CHECK-NEXT: bfe.u32 %r4, %r1, 8 , 8;
3120+ ; CHECK-NEXT: bfe.u32 %r4, %r1, 16 , 8;
31213121; CHECK-NEXT: cvt.u16.u32 %rs3, %r4;
31223122; CHECK-NEXT: add.s16 %rs4, %rs3, 1;
31233123; CHECK-NEXT: cvt.u32.u16 %r5, %rs4;
3124- ; CHECK-NEXT: bfi .b32 %r6, %r5, %r3, 8, 8 ;
3125- ; CHECK-NEXT: bfe.u32 %r7, %r1, 16 , 8;
3124+ ; CHECK-NEXT: prmt .b32 %r6, %r5, %r3, 13120 ;
3125+ ; CHECK-NEXT: bfe.u32 %r7, %r1, 8 , 8;
31263126; CHECK-NEXT: cvt.u16.u32 %rs5, %r7;
31273127; CHECK-NEXT: add.s16 %rs6, %rs5, 1;
31283128; CHECK-NEXT: cvt.u32.u16 %r8, %rs6;
3129- ; CHECK-NEXT: bfi.b32 %r9, %r8, %r6, 16, 8;
3130- ; CHECK-NEXT: bfe.u32 %r10, %r1, 24, 8;
3131- ; CHECK-NEXT: cvt.u16.u32 %rs7, %r10;
3129+ ; CHECK-NEXT: bfe.u32 %r9, %r1, 0, 8;
3130+ ; CHECK-NEXT: cvt.u16.u32 %rs7, %r9;
31323131; CHECK-NEXT: add.s16 %rs8, %rs7, 1;
3133- ; CHECK-NEXT: cvt.u32.u16 %r11, %rs8;
3134- ; CHECK-NEXT: bfi.b32 %r12, %r11, %r9, 24, 8;
3132+ ; CHECK-NEXT: cvt.u32.u16 %r10, %rs8;
3133+ ; CHECK-NEXT: prmt.b32 %r11, %r10, %r8, 13120;
3134+ ; CHECK-NEXT: prmt.b32 %r12, %r11, %r6, 21520;
31353135; CHECK-NEXT: st.volatile.shared.u32 [%rd1], %r12;
31363136; CHECK-NEXT: ret;
31373137 %a.load = load volatile <4 x i8 >, ptr addrspace (3 ) %a
@@ -4018,25 +4018,25 @@ define void @local_4xi8(ptr addrspace(5) %a) {
40184018; CHECK-NEXT: // %bb.0:
40194019; CHECK-NEXT: ld.param.u64 %rd1, [local_4xi8_param_0];
40204020; CHECK-NEXT: ld.local.u32 %r1, [%rd1];
4021- ; CHECK-NEXT: bfe.u32 %r2, %r1, 0 , 8;
4021+ ; CHECK-NEXT: bfe.u32 %r2, %r1, 24 , 8;
40224022; CHECK-NEXT: cvt.u16.u32 %rs1, %r2;
40234023; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
40244024; CHECK-NEXT: cvt.u32.u16 %r3, %rs2;
4025- ; CHECK-NEXT: bfe.u32 %r4, %r1, 8 , 8;
4025+ ; CHECK-NEXT: bfe.u32 %r4, %r1, 16 , 8;
40264026; CHECK-NEXT: cvt.u16.u32 %rs3, %r4;
40274027; CHECK-NEXT: add.s16 %rs4, %rs3, 1;
40284028; CHECK-NEXT: cvt.u32.u16 %r5, %rs4;
4029- ; CHECK-NEXT: bfi .b32 %r6, %r5, %r3, 8, 8 ;
4030- ; CHECK-NEXT: bfe.u32 %r7, %r1, 16 , 8;
4029+ ; CHECK-NEXT: prmt .b32 %r6, %r5, %r3, 13120 ;
4030+ ; CHECK-NEXT: bfe.u32 %r7, %r1, 8 , 8;
40314031; CHECK-NEXT: cvt.u16.u32 %rs5, %r7;
40324032; CHECK-NEXT: add.s16 %rs6, %rs5, 1;
40334033; CHECK-NEXT: cvt.u32.u16 %r8, %rs6;
4034- ; CHECK-NEXT: bfi.b32 %r9, %r8, %r6, 16, 8;
4035- ; CHECK-NEXT: bfe.u32 %r10, %r1, 24, 8;
4036- ; CHECK-NEXT: cvt.u16.u32 %rs7, %r10;
4034+ ; CHECK-NEXT: bfe.u32 %r9, %r1, 0, 8;
4035+ ; CHECK-NEXT: cvt.u16.u32 %rs7, %r9;
40374036; CHECK-NEXT: add.s16 %rs8, %rs7, 1;
4038- ; CHECK-NEXT: cvt.u32.u16 %r11, %rs8;
4039- ; CHECK-NEXT: bfi.b32 %r12, %r11, %r9, 24, 8;
4037+ ; CHECK-NEXT: cvt.u32.u16 %r10, %rs8;
4038+ ; CHECK-NEXT: prmt.b32 %r11, %r10, %r8, 13120;
4039+ ; CHECK-NEXT: prmt.b32 %r12, %r11, %r6, 21520;
40404040; CHECK-NEXT: st.local.u32 [%rd1], %r12;
40414041; CHECK-NEXT: ret;
40424042 %a.load = load <4 x i8 >, ptr addrspace (5 ) %a
@@ -4343,25 +4343,25 @@ define void @local_volatile_4xi8(ptr addrspace(5) %a) {
43434343; CHECK-NEXT: // %bb.0:
43444344; CHECK-NEXT: ld.param.u64 %rd1, [local_volatile_4xi8_param_0];
43454345; CHECK-NEXT: ld.local.u32 %r1, [%rd1];
4346- ; CHECK-NEXT: bfe.u32 %r2, %r1, 0 , 8;
4346+ ; CHECK-NEXT: bfe.u32 %r2, %r1, 24 , 8;
43474347; CHECK-NEXT: cvt.u16.u32 %rs1, %r2;
43484348; CHECK-NEXT: add.s16 %rs2, %rs1, 1;
43494349; CHECK-NEXT: cvt.u32.u16 %r3, %rs2;
4350- ; CHECK-NEXT: bfe.u32 %r4, %r1, 8 , 8;
4350+ ; CHECK-NEXT: bfe.u32 %r4, %r1, 16 , 8;
43514351; CHECK-NEXT: cvt.u16.u32 %rs3, %r4;
43524352; CHECK-NEXT: add.s16 %rs4, %rs3, 1;
43534353; CHECK-NEXT: cvt.u32.u16 %r5, %rs4;
4354- ; CHECK-NEXT: bfi .b32 %r6, %r5, %r3, 8, 8 ;
4355- ; CHECK-NEXT: bfe.u32 %r7, %r1, 16 , 8;
4354+ ; CHECK-NEXT: prmt .b32 %r6, %r5, %r3, 13120 ;
4355+ ; CHECK-NEXT: bfe.u32 %r7, %r1, 8 , 8;
43564356; CHECK-NEXT: cvt.u16.u32 %rs5, %r7;
43574357; CHECK-NEXT: add.s16 %rs6, %rs5, 1;
43584358; CHECK-NEXT: cvt.u32.u16 %r8, %rs6;
4359- ; CHECK-NEXT: bfi.b32 %r9, %r8, %r6, 16, 8;
4360- ; CHECK-NEXT: bfe.u32 %r10, %r1, 24, 8;
4361- ; CHECK-NEXT: cvt.u16.u32 %rs7, %r10;
4359+ ; CHECK-NEXT: bfe.u32 %r9, %r1, 0, 8;
4360+ ; CHECK-NEXT: cvt.u16.u32 %rs7, %r9;
43624361; CHECK-NEXT: add.s16 %rs8, %rs7, 1;
4363- ; CHECK-NEXT: cvt.u32.u16 %r11, %rs8;
4364- ; CHECK-NEXT: bfi.b32 %r12, %r11, %r9, 24, 8;
4362+ ; CHECK-NEXT: cvt.u32.u16 %r10, %rs8;
4363+ ; CHECK-NEXT: prmt.b32 %r11, %r10, %r8, 13120;
4364+ ; CHECK-NEXT: prmt.b32 %r12, %r11, %r6, 21520;
43654365; CHECK-NEXT: st.local.u32 [%rd1], %r12;
43664366; CHECK-NEXT: ret;
43674367 %a.load = load volatile <4 x i8 >, ptr addrspace (5 ) %a
0 commit comments