Skip to content

Commit 470411f

Browse files
author
Artemiy Volkov
committed
forwprop: allow subvectors in simplify_vector_constructor ()
This is an attempt to fix https://gcc.gnu.org/pipermail/gcc-patches/2025-October/697879.html in the middle-end; the motivation in that patch was to teach gcc to compile: int16x8_t foo (int16x8_t x) { return vcombine_s16 (vget_high_s16 (x), vget_low_s16 (x)); } into one instruction: foo: ext v0.16b, v0.16b, v0.16b, #8 ret rather than the two we are generating now: foo: dup d31, v0.d[1] uzp1 v0.2d, v31.2d, v0.2d ret Instead of adding a define_insn in the backend, this patch relaxes the precondition of tree-ssa-forwprop.cc:simplify_vector_constructor () to accept subvectors as constructor elements. During initial argument processing (ll. 3817-3916), subvectors are decomposed into individual elements before populating the ELTS array; this allows the rest of the function to remain unchanged. Special handling is also implemented for constant and splat subvector elements of a constructor (the latter with the use of ssa_uniform_vector_p () from tree-vect-generic.cc, which this patch moves to tree.cc). Add GIMPLE tests to gcc.dg/tree-ssa demonstrating the intended behavior with various combinations of subvectors as constructor arguments, including constant and splat subvectors; also add some aarch64-specific tests to show that the change leads to us picking the "ext" instruction for the resulting VEC_PERM_EXPR. Bootstrapped and regtested on aarch64 and x86_64, regtested on aarch64_be. gcc/ChangeLog: * tree-ssa-forwprop.cc (simplify_vector_constructor): Support vector constructor elements. * tree-vect-generic.cc (ssa_uniform_vector_p): Make non-static and move ... * tree.cc (ssa_uniform_vector_p): ... here. * tree.h (ssa_uniform_vector_p): Declare it. gcc/testsuite/ChangeLog: * gcc.dg/tree-ssa/forwprop-43.c: New test. * gcc.target/aarch64/simd/combine_ext.c: New test.
1 parent 7c4f8ae commit 470411f

File tree

6 files changed

+279
-29
lines changed

6 files changed

+279
-29
lines changed
Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,169 @@
1+
/* { dg-do compile } */
2+
/* { dg-options "-O2 -fdump-tree-forwprop1" } */
3+
/* { dg-additional-options "-fgimple" } */
4+
5+
#include <stdint.h>
6+
7+
typedef int32_t int32x4_t __attribute__((vector_size(16)));
8+
typedef int32_t int32x2_t __attribute__((vector_size(8)));
9+
typedef int32_t int32x1_t __attribute__((vector_size(4)));
10+
11+
int32x4_t __GIMPLE (ssa)
12+
foo (int32x4_t x)
13+
{
14+
int32x2_t _1;
15+
int32x2_t _2;
16+
int32x4_t _6;
17+
18+
__BB(2):
19+
_1 = __BIT_FIELD_REF <int32x2_t> (x, 64, 64);
20+
_2 = __BIT_FIELD_REF <int32x2_t> (x, 64, 0);
21+
_6 = _Literal (int32x4_t) { _1, _2 };
22+
return _6;
23+
}
24+
25+
int32x4_t __GIMPLE (ssa)
26+
foo2 (int32x4_t x)
27+
{
28+
int32x1_t _1;
29+
int32x1_t _2;
30+
int32x1_t _3;
31+
int32x1_t _4;
32+
int32x4_t _6;
33+
34+
__BB(2):
35+
_1 = __BIT_FIELD_REF <int32x1_t> (x, 32, 64);
36+
_2 = __BIT_FIELD_REF <int32x1_t> (x, 32, 96);
37+
_3 = __BIT_FIELD_REF <int32x1_t> (x, 32, 0);
38+
_4 = __BIT_FIELD_REF <int32x1_t> (x, 32, 32);
39+
_6 = _Literal (int32x4_t) { _1, _2, _3, _4 };
40+
return _6;
41+
}
42+
43+
int32x4_t __GIMPLE (ssa)
44+
foo3 (int32x4_t x, int32x4_t y)
45+
{
46+
int32x2_t _1;
47+
int32x2_t _2;
48+
int32x4_t _6;
49+
50+
__BB(2):
51+
_1 = __BIT_FIELD_REF <int32x2_t> (x, 64, 64);
52+
_2 = __BIT_FIELD_REF <int32x2_t> (y, 64, 0);
53+
_6 = _Literal (int32x4_t) { _1, _2 };
54+
return _6;
55+
}
56+
57+
int32x4_t __GIMPLE (ssa)
58+
foo4 (int32x4_t x, int32x4_t y)
59+
{
60+
int32x1_t _1;
61+
int32x1_t _2;
62+
int32x1_t _3;
63+
int32x1_t _4;
64+
int32x4_t _6;
65+
66+
__BB(2):
67+
_1 = __BIT_FIELD_REF <int32x1_t> (x, 32, 64);
68+
_2 = __BIT_FIELD_REF <int32x1_t> (y, 32, 96);
69+
_3 = __BIT_FIELD_REF <int32x1_t> (x, 32, 0);
70+
_4 = __BIT_FIELD_REF <int32x1_t> (y, 32, 32);
71+
_6 = _Literal (int32x4_t) { _1, _2, _3, _4 };
72+
return _6;
73+
}
74+
75+
int32x4_t __GIMPLE (ssa)
76+
foo5 (int32x4_t x)
77+
{
78+
int32x2_t _1;
79+
int32x2_t _2;
80+
int32x4_t _6;
81+
82+
__BB(2):
83+
_1 = __BIT_FIELD_REF <int32x2_t> (x, 64, 64);
84+
_2 = _Literal (int32x2_t) { 1, 2 };
85+
_6 = _Literal (int32x4_t) { _1, _2 };
86+
return _6;
87+
}
88+
89+
int32x4_t __GIMPLE (ssa)
90+
foo6 (int32x4_t x, int32_t y)
91+
{
92+
int32x2_t _1;
93+
int32x2_t _2;
94+
int32x4_t _6;
95+
96+
__BB(2):
97+
_1 = __BIT_FIELD_REF <int32x2_t> (x, 64, 64);
98+
_2 = _Literal (int32x2_t) { y, y };
99+
_6 = _Literal (int32x4_t) { _1, _2 };
100+
return _6;
101+
}
102+
103+
int32x4_t __GIMPLE (ssa)
104+
foo7 (int32x4_t x)
105+
{
106+
int32x2_t _1;
107+
int32x2_t _2;
108+
int32x4_t _6;
109+
110+
__BB(2):
111+
_1 = __BIT_FIELD_REF <int32x2_t> (x, 64, 64);
112+
_2 = _Literal (int32x2_t) { 1, 2 };
113+
_6 = _Literal (int32x4_t) { _2, _1 };
114+
return _6;
115+
}
116+
117+
int32x4_t __GIMPLE (ssa)
118+
foo8 (int32x4_t x, int32_t y)
119+
{
120+
int32x2_t _1;
121+
int32x2_t _2;
122+
int32x4_t _6;
123+
124+
__BB(2):
125+
_1 = __BIT_FIELD_REF <int32x2_t> (x, 64, 64);
126+
_2 = _Literal (int32x2_t) { y, y };
127+
_6 = _Literal (int32x4_t) { _2, _1 };
128+
return _6;
129+
}
130+
131+
int32x4_t __GIMPLE (ssa)
132+
foo9 (int32x4_t x)
133+
{
134+
int32x1_t _1;
135+
int32x1_t _2;
136+
int32x1_t _3;
137+
int32x1_t _4;
138+
int32x4_t _6;
139+
140+
__BB(2):
141+
_1 = __BIT_FIELD_REF <int32x1_t> (x, 32, 96);
142+
_2 = __BIT_FIELD_REF <int32x1_t> (x, 32, 64);
143+
_3 = _Literal (int32x1_t) { 1 };
144+
_4 = _Literal (int32x1_t) { 1 };
145+
_6 = _Literal (int32x4_t) { _3, _4, _1, _2 };
146+
return _6;
147+
}
148+
149+
int32x4_t __GIMPLE (ssa)
150+
foo10 (int32x4_t x, int32_t y)
151+
{
152+
int32x1_t _1;
153+
int32x1_t _2;
154+
int32x1_t _3;
155+
int32x1_t _4;
156+
int32x4_t _6;
157+
158+
__BB(2):
159+
_1 = __BIT_FIELD_REF <int32x1_t> (x, 32, 96);
160+
_2 = __BIT_FIELD_REF <int32x1_t> (x, 32, 64);
161+
_3 = _Literal (int32x1_t) { y };
162+
_4 = _Literal (int32x1_t) { y };
163+
_6 = _Literal (int32x4_t) { _3, _4, _1, _2 };
164+
165+
return _6;
166+
}
167+
168+
169+
/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 10 "forwprop1" } } */
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
/* { dg-do compile } */
2+
/* { dg-options "-O1 -fdump-tree-optimized" } */
3+
4+
#include <arm_neon.h>
5+
6+
#ifndef TEST_COMBINE_HIGH_LOW_1
7+
#define TEST_COMBINE_HIGH_LOW_1(TYPE, SUFF) \
8+
TYPE rev_##TYPE##_1 (TYPE x) \
9+
{ \
10+
return vcombine_##SUFF (vget_high_##SUFF (x), vget_low_##SUFF (x)); \
11+
}
12+
#endif
13+
14+
#ifndef TEST_COMBINE_HIGH_LOW_2
15+
#define TEST_COMBINE_HIGH_LOW_2(TYPE, SUFF) \
16+
TYPE rev_##TYPE##_2 (TYPE x, TYPE y) \
17+
{ \
18+
return vcombine_##SUFF (vget_high_##SUFF (x), vget_low_##SUFF (y)); \
19+
}
20+
#endif
21+
22+
TEST_COMBINE_HIGH_LOW_1 (int8x16_t, s8)
23+
TEST_COMBINE_HIGH_LOW_1 (int16x8_t, s16)
24+
TEST_COMBINE_HIGH_LOW_1 (int32x4_t, s32)
25+
TEST_COMBINE_HIGH_LOW_1 (int64x2_t, s64)
26+
TEST_COMBINE_HIGH_LOW_1 (uint8x16_t, u8)
27+
TEST_COMBINE_HIGH_LOW_1 (uint16x8_t, u16)
28+
TEST_COMBINE_HIGH_LOW_1 (uint32x4_t, u32)
29+
TEST_COMBINE_HIGH_LOW_1 (uint64x2_t, u64)
30+
TEST_COMBINE_HIGH_LOW_1 (float16x8_t, f16)
31+
TEST_COMBINE_HIGH_LOW_1 (float32x4_t, f32)
32+
33+
TEST_COMBINE_HIGH_LOW_2 (int8x16_t, s8)
34+
TEST_COMBINE_HIGH_LOW_2 (int16x8_t, s16)
35+
TEST_COMBINE_HIGH_LOW_2 (int32x4_t, s32)
36+
TEST_COMBINE_HIGH_LOW_2 (int64x2_t, s64)
37+
TEST_COMBINE_HIGH_LOW_2 (uint8x16_t, u8)
38+
TEST_COMBINE_HIGH_LOW_2 (uint16x8_t, u16)
39+
TEST_COMBINE_HIGH_LOW_2 (uint32x4_t, u32)
40+
TEST_COMBINE_HIGH_LOW_2 (uint64x2_t, u64)
41+
TEST_COMBINE_HIGH_LOW_2 (float16x8_t, f16)
42+
TEST_COMBINE_HIGH_LOW_2 (float32x4_t, f32)
43+
44+
/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 20 "optimized" } } */
45+
/* { dg-final { scan-assembler-times {ext\tv0.16b, v0.16b, v0.16b, #8} 10 } } */
46+
/* { dg-final { scan-assembler-times {ext\tv0.16b, v0.16b, v1.16b, #8} 10 } } */

gcc/tree-ssa-forwprop.cc

Lines changed: 42 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3807,13 +3807,16 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
38073807
bool maybe_blend[2] = { true, true };
38083808
tree one_constant = NULL_TREE;
38093809
tree one_nonconstant = NULL_TREE;
3810+
tree subelt;
38103811
auto_vec<tree> constants;
38113812
constants.safe_grow_cleared (nelts, true);
38123813
auto_vec<std::pair<unsigned, unsigned>, 64> elts;
3814+
unsigned int tsubelts = 0;
38133815
FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (op), i, elt)
38143816
{
38153817
tree ref, op1;
3816-
unsigned int elem;
3818+
unsigned int elem, src_elem_size;
3819+
unsigned HOST_WIDE_INT nsubelts = 1;
38173820

38183821
if (i >= nelts)
38193822
return false;
@@ -3824,10 +3827,16 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
38243827
if (op1
38253828
&& TREE_CODE ((ref = TREE_OPERAND (op1, 0))) == SSA_NAME
38263829
&& VECTOR_TYPE_P (TREE_TYPE (ref))
3827-
&& useless_type_conversion_p (TREE_TYPE (op1),
3830+
&& (useless_type_conversion_p (TREE_TYPE (op1),
38283831
TREE_TYPE (TREE_TYPE (ref)))
3829-
&& constant_multiple_p (bit_field_offset (op1),
3830-
bit_field_size (op1), &elem)
3832+
|| (VECTOR_TYPE_P (TREE_TYPE (op1))
3833+
&& useless_type_conversion_p (TREE_TYPE (TREE_TYPE (op1)),
3834+
TREE_TYPE (TREE_TYPE (ref)))
3835+
&& TYPE_VECTOR_SUBPARTS (TREE_TYPE (op1))
3836+
.is_constant (&nsubelts)))
3837+
&& constant_multiple_p (bit_field_size (op1), nsubelts,
3838+
&src_elem_size)
3839+
&& constant_multiple_p (bit_field_offset (op1), src_elem_size, &elem)
38313840
&& TYPE_VECTOR_SUBPARTS (TREE_TYPE (ref)).is_constant (&refnelts))
38323841
{
38333842
unsigned int j;
@@ -3851,7 +3860,9 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
38513860
maybe_ident = false;
38523861
if (elem != i)
38533862
maybe_blend[j] = false;
3854-
elts.safe_push (std::make_pair (j, elem));
3863+
for (unsigned int k = 0; k < nsubelts; ++k)
3864+
elts.safe_push (std::make_pair (j, elem + k));
3865+
tsubelts += nsubelts;
38553866
continue;
38563867
}
38573868
/* Else fallthru. */
@@ -3863,27 +3874,47 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
38633874
&& orig[1] != error_mark_node)
38643875
return false;
38653876
orig[1] = error_mark_node;
3877+
if (VECTOR_TYPE_P (TREE_TYPE (elt->value))
3878+
&& !TYPE_VECTOR_SUBPARTS (TREE_TYPE (elt->value))
3879+
.is_constant (&nsubelts))
3880+
return false;
38663881
if (CONSTANT_CLASS_P (elt->value))
38673882
{
38683883
if (one_nonconstant)
38693884
return false;
38703885
if (!one_constant)
3871-
one_constant = elt->value;
3872-
constants[i] = elt->value;
3886+
one_constant = TREE_CODE (elt->value) == VECTOR_CST
3887+
? VECTOR_CST_ELT (elt->value, 0)
3888+
: elt->value;
3889+
if (TREE_CODE (elt->value) == VECTOR_CST)
3890+
{
3891+
for (unsigned int k = 0; k < nsubelts; k++)
3892+
constants[tsubelts + k] = VECTOR_CST_ELT (elt->value, k);
3893+
}
3894+
else
3895+
constants[tsubelts] = elt->value;
38733896
}
38743897
else
38753898
{
38763899
if (one_constant)
38773900
return false;
3901+
subelt = VECTOR_TYPE_P (TREE_TYPE (elt->value))
3902+
? ssa_uniform_vector_p (elt->value)
3903+
: elt->value;
3904+
if (!subelt)
3905+
return false;
38783906
if (!one_nonconstant)
3879-
one_nonconstant = elt->value;
3880-
else if (!operand_equal_p (one_nonconstant, elt->value, 0))
3907+
one_nonconstant = subelt;
3908+
else if (!operand_equal_p (one_nonconstant, subelt, 0))
38813909
return false;
38823910
}
3883-
elts.safe_push (std::make_pair (1, i));
3911+
for (unsigned int k = 0; k < nsubelts; ++k)
3912+
elts.safe_push (std::make_pair (1, tsubelts + k));
3913+
tsubelts += nsubelts;
38843914
maybe_ident = false;
38853915
}
3886-
if (i < nelts)
3916+
3917+
if (elts.length () < nelts)
38873918
return false;
38883919

38893920
if (! orig[0]

gcc/tree-vect-generic.cc

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1619,24 +1619,6 @@ lower_vec_perm (gimple_stmt_iterator *gsi)
16191619
update_stmt (gsi_stmt (*gsi));
16201620
}
16211621

1622-
/* If OP is a uniform vector return the element it is a splat from. */
1623-
1624-
static tree
1625-
ssa_uniform_vector_p (tree op)
1626-
{
1627-
if (TREE_CODE (op) == VECTOR_CST
1628-
|| TREE_CODE (op) == VEC_DUPLICATE_EXPR
1629-
|| TREE_CODE (op) == CONSTRUCTOR)
1630-
return uniform_vector_p (op);
1631-
if (TREE_CODE (op) == SSA_NAME)
1632-
{
1633-
gimple *def_stmt = SSA_NAME_DEF_STMT (op);
1634-
if (gimple_assign_single_p (def_stmt))
1635-
return uniform_vector_p (gimple_assign_rhs1 (def_stmt));
1636-
}
1637-
return NULL_TREE;
1638-
}
1639-
16401622
/* Return the type that should be used to implement OP on type TYPE.
16411623
This is TYPE itself if the target can do the operation directly,
16421624
otherwise it is a scalar type or a smaller vector type. */

gcc/tree.cc

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10823,6 +10823,24 @@ uniform_vector_p (const_tree vec)
1082310823
return NULL_TREE;
1082410824
}
1082510825

10826+
/* If OP is a uniform vector return the element it is a splat from. */
10827+
10828+
tree
10829+
ssa_uniform_vector_p (tree op)
10830+
{
10831+
if (TREE_CODE (op) == VECTOR_CST
10832+
|| TREE_CODE (op) == VEC_DUPLICATE_EXPR
10833+
|| TREE_CODE (op) == CONSTRUCTOR)
10834+
return uniform_vector_p (op);
10835+
if (TREE_CODE (op) == SSA_NAME)
10836+
{
10837+
gimple *def_stmt = SSA_NAME_DEF_STMT (op);
10838+
if (gimple_assign_single_p (def_stmt))
10839+
return uniform_vector_p (gimple_assign_rhs1 (def_stmt));
10840+
}
10841+
return NULL_TREE;
10842+
}
10843+
1082610844
/* If the argument is INTEGER_CST, return it. If the argument is vector
1082710845
with all elements the same INTEGER_CST, return that INTEGER_CST. Otherwise
1082810846
return NULL_TREE.

gcc/tree.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5303,6 +5303,10 @@ extern tree vector_cst_elt (const_tree, unsigned int);
53035303

53045304
extern tree uniform_vector_p (const_tree);
53055305

5306+
/* Same as above, but if VEC is an SSA_NAME, inspect its definition. */
5307+
5308+
extern tree ssa_uniform_vector_p (tree);
5309+
53065310
/* If the argument is INTEGER_CST, return it. If the argument is vector
53075311
with all elements the same INTEGER_CST, return that INTEGER_CST. Otherwise
53085312
return NULL_TREE. */

0 commit comments

Comments
 (0)