@@ -155,8 +155,7 @@ define <8 x double> @combine_vpermt2var_8f64_identity_mask(<8 x double> %x0, <8
155155; X86-AVX512F-NEXT: movzbl {{[0-9]+}}(%esp), %eax
156156; X86-AVX512F-NEXT: kmovw %eax, %k1
157157; X86-AVX512F-NEXT: vpermt2pd %zmm0, %zmm1, %zmm0 {%k1} {z}
158- ; X86-AVX512F-NEXT: vmovapd {{.*#+}} zmm1 = [7,0,14,0,5,0,12,0,3,0,10,0,1,0,8,0]
159- ; X86-AVX512F-NEXT: vpermt2pd %zmm0, %zmm1, %zmm0 {%k1} {z}
158+ ; X86-AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0 {%k1} {z}
160159; X86-AVX512F-NEXT: retl
161160;
162161; X86-AVX512BW-LABEL: combine_vpermt2var_8f64_identity_mask:
@@ -165,26 +164,23 @@ define <8 x double> @combine_vpermt2var_8f64_identity_mask(<8 x double> %x0, <8
165164; X86-AVX512BW-NEXT: movzbl {{[0-9]+}}(%esp), %eax
166165; X86-AVX512BW-NEXT: kmovd %eax, %k1
167166; X86-AVX512BW-NEXT: vpermt2pd %zmm0, %zmm1, %zmm0 {%k1} {z}
168- ; X86-AVX512BW-NEXT: vmovapd {{.*#+}} zmm1 = [7,0,14,0,5,0,12,0,3,0,10,0,1,0,8,0]
169- ; X86-AVX512BW-NEXT: vpermt2pd %zmm0, %zmm1, %zmm0 {%k1} {z}
167+ ; X86-AVX512BW-NEXT: vpermpd %zmm0, %zmm1, %zmm0 {%k1} {z}
170168; X86-AVX512BW-NEXT: retl
171169;
172170; X64-AVX512F-LABEL: combine_vpermt2var_8f64_identity_mask:
173171; X64-AVX512F: # %bb.0:
174172; X64-AVX512F-NEXT: vmovapd {{.*#+}} zmm1 = [7,6,5,4,3,2,1,0]
175173; X64-AVX512F-NEXT: kmovw %edi, %k1
176174; X64-AVX512F-NEXT: vpermt2pd %zmm0, %zmm1, %zmm0 {%k1} {z}
177- ; X64-AVX512F-NEXT: vmovapd {{.*#+}} zmm1 = [7,14,5,12,3,10,1,8]
178- ; X64-AVX512F-NEXT: vpermt2pd %zmm0, %zmm1, %zmm0 {%k1} {z}
175+ ; X64-AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm0 {%k1} {z}
179176; X64-AVX512F-NEXT: retq
180177;
181178; X64-AVX512BW-LABEL: combine_vpermt2var_8f64_identity_mask:
182179; X64-AVX512BW: # %bb.0:
183180; X64-AVX512BW-NEXT: vmovapd {{.*#+}} zmm1 = [7,6,5,4,3,2,1,0]
184181; X64-AVX512BW-NEXT: kmovd %edi, %k1
185182; X64-AVX512BW-NEXT: vpermt2pd %zmm0, %zmm1, %zmm0 {%k1} {z}
186- ; X64-AVX512BW-NEXT: vmovapd {{.*#+}} zmm1 = [7,14,5,12,3,10,1,8]
187- ; X64-AVX512BW-NEXT: vpermt2pd %zmm0, %zmm1, %zmm0 {%k1} {z}
183+ ; X64-AVX512BW-NEXT: vpermpd %zmm0, %zmm1, %zmm0 {%k1} {z}
188184; X64-AVX512BW-NEXT: retq
189185 %res0 = call <8 x double > @llvm.x86.avx512.maskz.vpermt2var.pd.512 (<8 x i64 > <i64 7 , i64 6 , i64 5 , i64 4 , i64 3 , i64 2 , i64 1 , i64 0 >, <8 x double > %x0 , <8 x double > %x1 , i8 %m )
190186 %res1 = call <8 x double > @llvm.x86.avx512.maskz.vpermt2var.pd.512 (<8 x i64 > <i64 7 , i64 14 , i64 5 , i64 12 , i64 3 , i64 10 , i64 1 , i64 8 >, <8 x double > %res0 , <8 x double > %res0 , i8 %m )
@@ -259,8 +255,7 @@ define <8 x i64> @combine_vpermt2var_8i64_identity_mask(<8 x i64> %x0, <8 x i64>
259255; X86-AVX512F-NEXT: movzbl {{[0-9]+}}(%esp), %eax
260256; X86-AVX512F-NEXT: kmovw %eax, %k1
261257; X86-AVX512F-NEXT: vpermt2q %zmm0, %zmm1, %zmm0 {%k1} {z}
262- ; X86-AVX512F-NEXT: vpmovsxbq {{.*#+}} zmm1 = [7,14,5,12,3,10,1,8]
263- ; X86-AVX512F-NEXT: vpermt2q %zmm0, %zmm1, %zmm0 {%k1} {z}
258+ ; X86-AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0 {%k1} {z}
264259; X86-AVX512F-NEXT: retl
265260;
266261; X86-AVX512BW-LABEL: combine_vpermt2var_8i64_identity_mask:
@@ -269,26 +264,23 @@ define <8 x i64> @combine_vpermt2var_8i64_identity_mask(<8 x i64> %x0, <8 x i64>
269264; X86-AVX512BW-NEXT: movzbl {{[0-9]+}}(%esp), %eax
270265; X86-AVX512BW-NEXT: kmovd %eax, %k1
271266; X86-AVX512BW-NEXT: vpermt2q %zmm0, %zmm1, %zmm0 {%k1} {z}
272- ; X86-AVX512BW-NEXT: vpmovsxbq {{.*#+}} zmm1 = [7,14,5,12,3,10,1,8]
273- ; X86-AVX512BW-NEXT: vpermt2q %zmm0, %zmm1, %zmm0 {%k1} {z}
267+ ; X86-AVX512BW-NEXT: vpermq %zmm0, %zmm1, %zmm0 {%k1} {z}
274268; X86-AVX512BW-NEXT: retl
275269;
276270; X64-AVX512F-LABEL: combine_vpermt2var_8i64_identity_mask:
277271; X64-AVX512F: # %bb.0:
278272; X64-AVX512F-NEXT: vpmovsxbq {{.*#+}} zmm1 = [7,6,5,4,3,2,1,0]
279273; X64-AVX512F-NEXT: kmovw %edi, %k1
280274; X64-AVX512F-NEXT: vpermt2q %zmm0, %zmm1, %zmm0 {%k1} {z}
281- ; X64-AVX512F-NEXT: vpmovsxbq {{.*#+}} zmm1 = [7,14,5,12,3,10,1,8]
282- ; X64-AVX512F-NEXT: vpermt2q %zmm0, %zmm1, %zmm0 {%k1} {z}
275+ ; X64-AVX512F-NEXT: vpermq %zmm0, %zmm1, %zmm0 {%k1} {z}
283276; X64-AVX512F-NEXT: retq
284277;
285278; X64-AVX512BW-LABEL: combine_vpermt2var_8i64_identity_mask:
286279; X64-AVX512BW: # %bb.0:
287280; X64-AVX512BW-NEXT: vpmovsxbq {{.*#+}} zmm1 = [7,6,5,4,3,2,1,0]
288281; X64-AVX512BW-NEXT: kmovd %edi, %k1
289282; X64-AVX512BW-NEXT: vpermt2q %zmm0, %zmm1, %zmm0 {%k1} {z}
290- ; X64-AVX512BW-NEXT: vpmovsxbq {{.*#+}} zmm1 = [7,14,5,12,3,10,1,8]
291- ; X64-AVX512BW-NEXT: vpermt2q %zmm0, %zmm1, %zmm0 {%k1} {z}
283+ ; X64-AVX512BW-NEXT: vpermq %zmm0, %zmm1, %zmm0 {%k1} {z}
292284; X64-AVX512BW-NEXT: retq
293285 %res0 = call <8 x i64 > @llvm.x86.avx512.maskz.vpermt2var.q.512 (<8 x i64 > <i64 7 , i64 6 , i64 5 , i64 4 , i64 3 , i64 2 , i64 1 , i64 0 >, <8 x i64 > %x0 , <8 x i64 > %x1 , i8 %m )
294286 %res1 = call <8 x i64 > @llvm.x86.avx512.maskz.vpermt2var.q.512 (<8 x i64 > <i64 7 , i64 14 , i64 5 , i64 12 , i64 3 , i64 10 , i64 1 , i64 8 >, <8 x i64 > %res0 , <8 x i64 > %res0 , i8 %m )
@@ -309,26 +301,23 @@ define <16 x float> @combine_vpermt2var_16f32_identity_mask(<16 x float> %x0, <1
309301; X86-NEXT: vmovaps {{.*#+}} zmm1 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
310302; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
311303; X86-NEXT: vpermt2ps %zmm0, %zmm1, %zmm0 {%k1} {z}
312- ; X86-NEXT: vmovaps {{.*#+}} zmm1 = [15,30,13,28,11,26,9,24,7,22,5,20,3,18,1,16]
313- ; X86-NEXT: vpermt2ps %zmm0, %zmm1, %zmm0 {%k1} {z}
304+ ; X86-NEXT: vpermps %zmm0, %zmm1, %zmm0 {%k1} {z}
314305; X86-NEXT: retl
315306;
316307; X64-AVX512F-LABEL: combine_vpermt2var_16f32_identity_mask:
317308; X64-AVX512F: # %bb.0:
318309; X64-AVX512F-NEXT: vmovaps {{.*#+}} zmm1 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
319310; X64-AVX512F-NEXT: kmovw %edi, %k1
320311; X64-AVX512F-NEXT: vpermt2ps %zmm0, %zmm1, %zmm0 {%k1} {z}
321- ; X64-AVX512F-NEXT: vmovaps {{.*#+}} zmm1 = [15,30,13,28,11,26,9,24,7,22,5,20,3,18,1,16]
322- ; X64-AVX512F-NEXT: vpermt2ps %zmm0, %zmm1, %zmm0 {%k1} {z}
312+ ; X64-AVX512F-NEXT: vpermps %zmm0, %zmm1, %zmm0 {%k1} {z}
323313; X64-AVX512F-NEXT: retq
324314;
325315; X64-AVX512BW-LABEL: combine_vpermt2var_16f32_identity_mask:
326316; X64-AVX512BW: # %bb.0:
327317; X64-AVX512BW-NEXT: vmovaps {{.*#+}} zmm1 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
328318; X64-AVX512BW-NEXT: kmovd %edi, %k1
329319; X64-AVX512BW-NEXT: vpermt2ps %zmm0, %zmm1, %zmm0 {%k1} {z}
330- ; X64-AVX512BW-NEXT: vmovaps {{.*#+}} zmm1 = [15,30,13,28,11,26,9,24,7,22,5,20,3,18,1,16]
331- ; X64-AVX512BW-NEXT: vpermt2ps %zmm0, %zmm1, %zmm0 {%k1} {z}
320+ ; X64-AVX512BW-NEXT: vpermps %zmm0, %zmm1, %zmm0 {%k1} {z}
332321; X64-AVX512BW-NEXT: retq
333322 %res0 = call <16 x float > @llvm.x86.avx512.maskz.vpermt2var.ps.512 (<16 x i32 > <i32 15 , i32 14 , i32 13 , i32 12 , i32 11 , i32 10 , i32 9 , i32 8 , i32 7 , i32 6 , i32 5 , i32 4 , i32 3 , i32 2 , i32 1 , i32 0 >, <16 x float > %x0 , <16 x float > %x1 , i16 %m )
334323 %res1 = call <16 x float > @llvm.x86.avx512.maskz.vpermt2var.ps.512 (<16 x i32 > <i32 15 , i32 30 , i32 13 , i32 28 , i32 11 , i32 26 , i32 9 , i32 24 , i32 7 , i32 22 , i32 5 , i32 20 , i32 3 , i32 18 , i32 1 , i32 16 >, <16 x float > %res0 , <16 x float > %res0 , i16 %m )
@@ -598,26 +587,23 @@ define <16 x i32> @combine_vpermt2var_16i32_identity_mask(<16 x i32> %x0, <16 x
598587; X86-NEXT: vpmovsxbd {{.*#+}} zmm1 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
599588; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1
600589; X86-NEXT: vpermt2d %zmm0, %zmm1, %zmm0 {%k1} {z}
601- ; X86-NEXT: vpmovsxbd {{.*#+}} zmm1 = [15,30,13,28,11,26,9,24,7,22,5,20,3,18,1,16]
602- ; X86-NEXT: vpermt2d %zmm0, %zmm1, %zmm0 {%k1} {z}
590+ ; X86-NEXT: vpermd %zmm0, %zmm1, %zmm0 {%k1} {z}
603591; X86-NEXT: retl
604592;
605593; X64-AVX512F-LABEL: combine_vpermt2var_16i32_identity_mask:
606594; X64-AVX512F: # %bb.0:
607595; X64-AVX512F-NEXT: vpmovsxbd {{.*#+}} zmm1 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
608596; X64-AVX512F-NEXT: kmovw %edi, %k1
609597; X64-AVX512F-NEXT: vpermt2d %zmm0, %zmm1, %zmm0 {%k1} {z}
610- ; X64-AVX512F-NEXT: vpmovsxbd {{.*#+}} zmm1 = [15,30,13,28,11,26,9,24,7,22,5,20,3,18,1,16]
611- ; X64-AVX512F-NEXT: vpermt2d %zmm0, %zmm1, %zmm0 {%k1} {z}
598+ ; X64-AVX512F-NEXT: vpermd %zmm0, %zmm1, %zmm0 {%k1} {z}
612599; X64-AVX512F-NEXT: retq
613600;
614601; X64-AVX512BW-LABEL: combine_vpermt2var_16i32_identity_mask:
615602; X64-AVX512BW: # %bb.0:
616603; X64-AVX512BW-NEXT: vpmovsxbd {{.*#+}} zmm1 = [15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0]
617604; X64-AVX512BW-NEXT: kmovd %edi, %k1
618605; X64-AVX512BW-NEXT: vpermt2d %zmm0, %zmm1, %zmm0 {%k1} {z}
619- ; X64-AVX512BW-NEXT: vpmovsxbd {{.*#+}} zmm1 = [15,30,13,28,11,26,9,24,7,22,5,20,3,18,1,16]
620- ; X64-AVX512BW-NEXT: vpermt2d %zmm0, %zmm1, %zmm0 {%k1} {z}
606+ ; X64-AVX512BW-NEXT: vpermd %zmm0, %zmm1, %zmm0 {%k1} {z}
621607; X64-AVX512BW-NEXT: retq
622608 %res0 = call <16 x i32 > @llvm.x86.avx512.maskz.vpermt2var.d.512 (<16 x i32 > <i32 15 , i32 14 , i32 13 , i32 12 , i32 11 , i32 10 , i32 9 , i32 8 , i32 7 , i32 6 , i32 5 , i32 4 , i32 3 , i32 2 , i32 1 , i32 0 >, <16 x i32 > %x0 , <16 x i32 > %x1 , i16 %m )
623609 %res1 = call <16 x i32 > @llvm.x86.avx512.maskz.vpermt2var.d.512 (<16 x i32 > <i32 15 , i32 30 , i32 13 , i32 28 , i32 11 , i32 26 , i32 9 , i32 24 , i32 7 , i32 22 , i32 5 , i32 20 , i32 3 , i32 18 , i32 1 , i32 16 >, <16 x i32 > %res0 , <16 x i32 > %res0 , i16 %m )
0 commit comments