@@ -4895,11 +4895,10 @@ define void @vec512_i8_widen_to_i256_factor32_broadcast_to_v2i256_factor2(ptr %i
48954895;
48964896; AVX512BW-LABEL: vec512_i8_widen_to_i256_factor32_broadcast_to_v2i256_factor2:
48974897; AVX512BW: # %bb.0:
4898- ; AVX512BW-NEXT: vmovdqa64 (%rdi), % zmm0
4898+ ; AVX512BW-NEXT: vpmovsxbq {{.*#+}} zmm0 = [8,0,2,0,8,0,6,0]
48994899; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
4900- ; AVX512BW-NEXT: vpmovsxbq {{.*#+}} zmm2 = [0,0,10,0,0,0,14,0]
4901- ; AVX512BW-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
4902- ; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm2, %zmm0
4900+ ; AVX512BW-NEXT: vpermt2q (%rdi), %zmm0, %zmm1
4901+ ; AVX512BW-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0
49034902; AVX512BW-NEXT: vpaddb (%rsi), %zmm0, %zmm0
49044903; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rdx)
49054904; AVX512BW-NEXT: vzeroupper
@@ -4997,11 +4996,10 @@ define void @vec512_i16_widen_to_i32_factor2_broadcast_to_v16i32_factor16(ptr %i
49974996;
49984997; AVX512BW-LABEL: vec512_i16_widen_to_i32_factor2_broadcast_to_v16i32_factor16:
49994998; AVX512BW: # %bb.0:
5000- ; AVX512BW-NEXT: vmovdqa64 (%rdi), % zmm0
4999+ ; AVX512BW-NEXT: vpmovsxbw {{.*#+}} zmm0 = [32,1,32,3,32,5,32,7,32,9,32,11,32,13,32,15,32,17,32,19,32,21,32,23,32,25,32,27,32,29,32,31]
50015000; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
5002- ; AVX512BW-NEXT: vpmovsxbw {{.*#+}} zmm2 = [0,33,0,35,0,37,0,39,0,41,0,43,0,45,0,47,0,49,0,51,0,53,0,55,0,57,0,59,0,61,0,63]
5003- ; AVX512BW-NEXT: vpermi2w %zmm1, %zmm0, %zmm2
5004- ; AVX512BW-NEXT: vpaddb (%rsi), %zmm2, %zmm0
5001+ ; AVX512BW-NEXT: vpermt2w (%rdi), %zmm0, %zmm1
5002+ ; AVX512BW-NEXT: vpaddb (%rsi), %zmm1, %zmm0
50055003; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rdx)
50065004; AVX512BW-NEXT: vzeroupper
50075005; AVX512BW-NEXT: retq
@@ -5411,39 +5409,36 @@ define void @vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8(ptr %in.
54115409;
54125410; AVX512F-LABEL: vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8:
54135411; AVX512F: # %bb.0:
5414- ; AVX512F-NEXT: vmovdqa64 (%rdi), % zmm0
5412+ ; AVX512F-NEXT: vpmovsxbd {{.*#+}} zmm0 = [16,1,16,3,16,5,16,7,16,9,16,11,16,13,16,15]
54155413; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
5416- ; AVX512F-NEXT: vpmovsxbd {{.*#+}} zmm2 = [0,17,0,19,0,21,0,23,0,25,0,27,0,29,0,31]
5417- ; AVX512F-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
5418- ; AVX512F-NEXT: vextracti64x4 $1, %zmm2, %ymm0
5414+ ; AVX512F-NEXT: vpermt2d (%rdi), %zmm0, %zmm1
5415+ ; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm0
54195416; AVX512F-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
5420- ; AVX512F-NEXT: vpaddb (%rsi), %ymm2 , %ymm1
5417+ ; AVX512F-NEXT: vpaddb (%rsi), %ymm1 , %ymm1
54215418; AVX512F-NEXT: vmovdqa %ymm1, (%rdx)
54225419; AVX512F-NEXT: vmovdqa %ymm0, 32(%rdx)
54235420; AVX512F-NEXT: vzeroupper
54245421; AVX512F-NEXT: retq
54255422;
54265423; AVX512DQ-LABEL: vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8:
54275424; AVX512DQ: # %bb.0:
5428- ; AVX512DQ-NEXT: vmovdqa64 (%rdi), % zmm0
5425+ ; AVX512DQ-NEXT: vpmovsxbd {{.*#+}} zmm0 = [16,1,16,3,16,5,16,7,16,9,16,11,16,13,16,15]
54295426; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
5430- ; AVX512DQ-NEXT: vpmovsxbd {{.*#+}} zmm2 = [0,17,0,19,0,21,0,23,0,25,0,27,0,29,0,31]
5431- ; AVX512DQ-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
5432- ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm2, %ymm0
5427+ ; AVX512DQ-NEXT: vpermt2d (%rdi), %zmm0, %zmm1
5428+ ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm0
54335429; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
5434- ; AVX512DQ-NEXT: vpaddb (%rsi), %ymm2 , %ymm1
5430+ ; AVX512DQ-NEXT: vpaddb (%rsi), %ymm1 , %ymm1
54355431; AVX512DQ-NEXT: vmovdqa %ymm1, (%rdx)
54365432; AVX512DQ-NEXT: vmovdqa %ymm0, 32(%rdx)
54375433; AVX512DQ-NEXT: vzeroupper
54385434; AVX512DQ-NEXT: retq
54395435;
54405436; AVX512BW-LABEL: vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8:
54415437; AVX512BW: # %bb.0:
5442- ; AVX512BW-NEXT: vmovdqa64 (%rdi), % zmm0
5438+ ; AVX512BW-NEXT: vpmovsxbd {{.*#+}} zmm0 = [16,1,16,3,16,5,16,7,16,9,16,11,16,13,16,15]
54435439; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
5444- ; AVX512BW-NEXT: vpmovsxbd {{.*#+}} zmm2 = [0,17,0,19,0,21,0,23,0,25,0,27,0,29,0,31]
5445- ; AVX512BW-NEXT: vpermi2d %zmm1, %zmm0, %zmm2
5446- ; AVX512BW-NEXT: vpaddb (%rsi), %zmm2, %zmm0
5440+ ; AVX512BW-NEXT: vpermt2d (%rdi), %zmm0, %zmm1
5441+ ; AVX512BW-NEXT: vpaddb (%rsi), %zmm1, %zmm0
54475442; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rdx)
54485443; AVX512BW-NEXT: vzeroupper
54495444; AVX512BW-NEXT: retq
@@ -5679,39 +5674,36 @@ define void @vec512_i64_widen_to_i128_factor2_broadcast_to_v4i128_factor4(ptr %i
56795674;
56805675; AVX512F-LABEL: vec512_i64_widen_to_i128_factor2_broadcast_to_v4i128_factor4:
56815676; AVX512F: # %bb.0:
5682- ; AVX512F-NEXT: vmovdqa64 (%rdi), % zmm0
5677+ ; AVX512F-NEXT: vpmovsxbq {{.*#+}} zmm0 = [8,1,8,3,8,5,8,7]
56835678; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
5684- ; AVX512F-NEXT: vpmovsxbq {{.*#+}} zmm2 = [0,9,0,11,0,13,0,15]
5685- ; AVX512F-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
5686- ; AVX512F-NEXT: vextracti64x4 $1, %zmm2, %ymm0
5679+ ; AVX512F-NEXT: vpermt2q (%rdi), %zmm0, %zmm1
5680+ ; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm0
56875681; AVX512F-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
5688- ; AVX512F-NEXT: vpaddb (%rsi), %ymm2 , %ymm1
5682+ ; AVX512F-NEXT: vpaddb (%rsi), %ymm1 , %ymm1
56895683; AVX512F-NEXT: vmovdqa %ymm1, (%rdx)
56905684; AVX512F-NEXT: vmovdqa %ymm0, 32(%rdx)
56915685; AVX512F-NEXT: vzeroupper
56925686; AVX512F-NEXT: retq
56935687;
56945688; AVX512DQ-LABEL: vec512_i64_widen_to_i128_factor2_broadcast_to_v4i128_factor4:
56955689; AVX512DQ: # %bb.0:
5696- ; AVX512DQ-NEXT: vmovdqa64 (%rdi), % zmm0
5690+ ; AVX512DQ-NEXT: vpmovsxbq {{.*#+}} zmm0 = [8,1,8,3,8,5,8,7]
56975691; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
5698- ; AVX512DQ-NEXT: vpmovsxbq {{.*#+}} zmm2 = [0,9,0,11,0,13,0,15]
5699- ; AVX512DQ-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
5700- ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm2, %ymm0
5692+ ; AVX512DQ-NEXT: vpermt2q (%rdi), %zmm0, %zmm1
5693+ ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm0
57015694; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
5702- ; AVX512DQ-NEXT: vpaddb (%rsi), %ymm2 , %ymm1
5695+ ; AVX512DQ-NEXT: vpaddb (%rsi), %ymm1 , %ymm1
57035696; AVX512DQ-NEXT: vmovdqa %ymm1, (%rdx)
57045697; AVX512DQ-NEXT: vmovdqa %ymm0, 32(%rdx)
57055698; AVX512DQ-NEXT: vzeroupper
57065699; AVX512DQ-NEXT: retq
57075700;
57085701; AVX512BW-LABEL: vec512_i64_widen_to_i128_factor2_broadcast_to_v4i128_factor4:
57095702; AVX512BW: # %bb.0:
5710- ; AVX512BW-NEXT: vmovdqa64 (%rdi), % zmm0
5703+ ; AVX512BW-NEXT: vpmovsxbq {{.*#+}} zmm0 = [8,1,8,3,8,5,8,7]
57115704; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
5712- ; AVX512BW-NEXT: vpmovsxbq {{.*#+}} zmm2 = [0,9,0,11,0,13,0,15]
5713- ; AVX512BW-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
5714- ; AVX512BW-NEXT: vpaddb (%rsi), %zmm2, %zmm0
5705+ ; AVX512BW-NEXT: vpermt2q (%rdi), %zmm0, %zmm1
5706+ ; AVX512BW-NEXT: vpaddb (%rsi), %zmm1, %zmm0
57155707; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rdx)
57165708; AVX512BW-NEXT: vzeroupper
57175709; AVX512BW-NEXT: retq
@@ -5938,39 +5930,36 @@ define void @vec512_i128_widen_to_i256_factor2_broadcast_to_v2i256_factor2(ptr %
59385930;
59395931; AVX512F-LABEL: vec512_i128_widen_to_i256_factor2_broadcast_to_v2i256_factor2:
59405932; AVX512F: # %bb.0:
5941- ; AVX512F-NEXT: vmovdqa64 (%rdi), % zmm0
5933+ ; AVX512F-NEXT: vpmovsxbq {{.*#+}} zmm0 = [8,9,2,3,8,9,6,7]
59425934; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
5943- ; AVX512F-NEXT: vpmovsxbq {{.*#+}} zmm2 = [0,1,10,11,0,1,14,15]
5944- ; AVX512F-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
5945- ; AVX512F-NEXT: vextracti64x4 $1, %zmm2, %ymm0
5935+ ; AVX512F-NEXT: vpermt2q (%rdi), %zmm0, %zmm1
5936+ ; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm0
59465937; AVX512F-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
5947- ; AVX512F-NEXT: vpaddb (%rsi), %ymm2 , %ymm1
5938+ ; AVX512F-NEXT: vpaddb (%rsi), %ymm1 , %ymm1
59485939; AVX512F-NEXT: vmovdqa %ymm1, (%rdx)
59495940; AVX512F-NEXT: vmovdqa %ymm0, 32(%rdx)
59505941; AVX512F-NEXT: vzeroupper
59515942; AVX512F-NEXT: retq
59525943;
59535944; AVX512DQ-LABEL: vec512_i128_widen_to_i256_factor2_broadcast_to_v2i256_factor2:
59545945; AVX512DQ: # %bb.0:
5955- ; AVX512DQ-NEXT: vmovdqa64 (%rdi), % zmm0
5946+ ; AVX512DQ-NEXT: vpmovsxbq {{.*#+}} zmm0 = [8,9,2,3,8,9,6,7]
59565947; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
5957- ; AVX512DQ-NEXT: vpmovsxbq {{.*#+}} zmm2 = [0,1,10,11,0,1,14,15]
5958- ; AVX512DQ-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
5959- ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm2, %ymm0
5948+ ; AVX512DQ-NEXT: vpermt2q (%rdi), %zmm0, %zmm1
5949+ ; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm0
59605950; AVX512DQ-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
5961- ; AVX512DQ-NEXT: vpaddb (%rsi), %ymm2 , %ymm1
5951+ ; AVX512DQ-NEXT: vpaddb (%rsi), %ymm1 , %ymm1
59625952; AVX512DQ-NEXT: vmovdqa %ymm1, (%rdx)
59635953; AVX512DQ-NEXT: vmovdqa %ymm0, 32(%rdx)
59645954; AVX512DQ-NEXT: vzeroupper
59655955; AVX512DQ-NEXT: retq
59665956;
59675957; AVX512BW-LABEL: vec512_i128_widen_to_i256_factor2_broadcast_to_v2i256_factor2:
59685958; AVX512BW: # %bb.0:
5969- ; AVX512BW-NEXT: vmovdqa64 (%rdi), % zmm0
5959+ ; AVX512BW-NEXT: vpmovsxbq {{.*#+}} zmm0 = [8,9,2,3,8,9,6,7]
59705960; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
5971- ; AVX512BW-NEXT: vpmovsxbq {{.*#+}} zmm2 = [0,1,10,11,0,1,14,15]
5972- ; AVX512BW-NEXT: vpermi2q %zmm1, %zmm0, %zmm2
5973- ; AVX512BW-NEXT: vpaddb (%rsi), %zmm2, %zmm0
5961+ ; AVX512BW-NEXT: vpermt2q (%rdi), %zmm0, %zmm1
5962+ ; AVX512BW-NEXT: vpaddb (%rsi), %zmm1, %zmm0
59745963; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rdx)
59755964; AVX512BW-NEXT: vzeroupper
59765965; AVX512BW-NEXT: retq
0 commit comments