tree-optimization/98550 - fix BB vect unrolling check
This fixes the check that disqualifies BB vectorization because of required unrolling to match up with the later exact_div we do. To not disable the ability to split groups that do not match up exactly with a choosen vector type this also introduces a soft-fail mechanism to vect_build_slp_tree_1 which delays failing to after the matches[] array is populated from other checks and only then determines the split point according to the vector type. 2021-01-12 Richard Biener <rguenther@suse.de> PR tree-optimization/98550 * tree-vect-slp.c (vect_record_max_nunits): Check whether the group size is a multiple of the vector element count. (vect_build_slp_tree_1): When we need to fail because the vector type choosen causes unrolling do so lazily without affecting matches only at the end to guide group splitting. * g++.dg/opt/pr98550.C: New testcase.
This commit is contained in:
parent
e91910d357
commit
52a170b1a1
2 changed files with 128 additions and 8 deletions
96
gcc/testsuite/g++.dg/opt/pr98550.C
Normal file
96
gcc/testsuite/g++.dg/opt/pr98550.C
Normal file
|
@ -0,0 +1,96 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target c++11 } */
|
||||
/* { dg-additional-options "-O3" } */
|
||||
/* { dg-additional-options "-march=z13" { target s390x-*-* } } */
|
||||
|
||||
template <int a> struct k { static constexpr int c = a; };
|
||||
template <bool, bool, typename...> struct o;
|
||||
template <typename f, typename... g> struct o<false, false, f, g...> {
|
||||
typedef decltype(0) h;
|
||||
};
|
||||
template <typename... g> struct p : o<k<false>::c, k<false>::c, g...> {};
|
||||
class q;
|
||||
class r {
|
||||
public:
|
||||
void ap(q);
|
||||
};
|
||||
template <typename, typename aw> void ax(aw ay) { ay(); }
|
||||
template <typename az> void ba(az bb) {
|
||||
using bc = p<az>;
|
||||
using bd = typename bc::h;
|
||||
ax<bd>(bb);
|
||||
}
|
||||
template <unsigned> class s;
|
||||
class t {
|
||||
public:
|
||||
s<8> br();
|
||||
template <typename...> void operator()() { ba(br()); }
|
||||
};
|
||||
class q {
|
||||
public:
|
||||
template <typename az> q(az) { H(); }
|
||||
struct H {
|
||||
t cc;
|
||||
H() { cc(); }
|
||||
};
|
||||
};
|
||||
template <unsigned long> struct I {};
|
||||
template <unsigned long cl, typename j> void cm(j cn, I<cl>) {
|
||||
cm(cn, I<cl - 1>());
|
||||
cn(cl);
|
||||
}
|
||||
template <typename j> void cm(j, I<0>) {}
|
||||
template <unsigned co> struct u {
|
||||
long cp[co];
|
||||
void cq(const u &);
|
||||
void cs(int);
|
||||
void operator<(u);
|
||||
};
|
||||
template <unsigned co> void u<co>::cq(const u &l) {
|
||||
cm([&](int i) { cp[i] &= l.cp[i]; }, I<co>());
|
||||
}
|
||||
template <unsigned co> void u<co>::cs(int m) {
|
||||
cm([&](int i) { cp[i] >>= m; }, I<co - 2>());
|
||||
}
|
||||
template <unsigned> class K;
|
||||
template <unsigned co> class v {
|
||||
int cv;
|
||||
friend K<co>;
|
||||
|
||||
public:
|
||||
void cx(int, unsigned char *, unsigned long long);
|
||||
};
|
||||
template <unsigned co> class K {
|
||||
public:
|
||||
static void cx(v<co> &);
|
||||
};
|
||||
template <unsigned co>
|
||||
void v<co>::cx(int, unsigned char *, unsigned long long) {
|
||||
K<co>::cx(*this);
|
||||
}
|
||||
template <unsigned co> void K<co>::cx(v<co> &cz) {
|
||||
u<co> a, b, d;
|
||||
int e, n = cz.cv;
|
||||
for (; e;)
|
||||
if (cz.cv)
|
||||
a.cs(cz.cv);
|
||||
a.cq(d);
|
||||
a < b;
|
||||
}
|
||||
template <unsigned co> class s {
|
||||
v<co> *dh;
|
||||
|
||||
public:
|
||||
void operator()();
|
||||
};
|
||||
template <unsigned co> void s<co>::operator()() {
|
||||
int f;
|
||||
unsigned char g;
|
||||
long h;
|
||||
dh->cx(f, &g, h);
|
||||
}
|
||||
void d() {
|
||||
r i;
|
||||
t j;
|
||||
i.ap(j);
|
||||
}
|
|
@ -873,11 +873,8 @@ vect_record_max_nunits (vec_info *vinfo, stmt_vec_info stmt_info,
|
|||
|
||||
/* If populating the vector type requires unrolling then fail
|
||||
before adjusting *max_nunits for basic-block vectorization. */
|
||||
poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
|
||||
unsigned HOST_WIDE_INT const_nunits;
|
||||
if (is_a <bb_vec_info> (vinfo)
|
||||
&& (!nunits.is_constant (&const_nunits)
|
||||
|| const_nunits > group_size))
|
||||
&& !multiple_p (group_size, TYPE_VECTOR_SUBPARTS (vectype)))
|
||||
{
|
||||
if (dump_enabled_p ())
|
||||
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
||||
|
@ -928,6 +925,8 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
|
|||
stmt_vec_info first_load = NULL, prev_first_load = NULL;
|
||||
bool first_stmt_load_p = false, load_p = false;
|
||||
bool first_stmt_phi_p = false, phi_p = false;
|
||||
bool maybe_soft_fail = false;
|
||||
tree soft_fail_nunits_vectype = NULL_TREE;
|
||||
|
||||
/* For every stmt in NODE find its def stmt/s. */
|
||||
stmt_vec_info stmt_info;
|
||||
|
@ -977,10 +976,7 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
|
|||
|
||||
tree nunits_vectype;
|
||||
if (!vect_get_vector_types_for_stmt (vinfo, stmt_info, &vectype,
|
||||
&nunits_vectype, group_size)
|
||||
|| (nunits_vectype
|
||||
&& !vect_record_max_nunits (vinfo, stmt_info, group_size,
|
||||
nunits_vectype, max_nunits)))
|
||||
&nunits_vectype, group_size))
|
||||
{
|
||||
if (is_a <bb_vec_info> (vinfo) && i != 0)
|
||||
continue;
|
||||
|
@ -988,6 +984,17 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
|
|||
matches[0] = false;
|
||||
return false;
|
||||
}
|
||||
/* Record nunits required but continue analysis, producing matches[]
|
||||
as if nunits was not an issue. This allows splitting of groups
|
||||
to happen. */
|
||||
if (nunits_vectype
|
||||
&& !vect_record_max_nunits (vinfo, stmt_info, group_size,
|
||||
nunits_vectype, max_nunits))
|
||||
{
|
||||
gcc_assert (is_a <bb_vec_info> (vinfo));
|
||||
maybe_soft_fail = true;
|
||||
soft_fail_nunits_vectype = nunits_vectype;
|
||||
}
|
||||
|
||||
gcc_assert (vectype);
|
||||
|
||||
|
@ -1340,6 +1347,23 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
|
|||
*two_operators = true;
|
||||
}
|
||||
|
||||
if (maybe_soft_fail)
|
||||
{
|
||||
unsigned HOST_WIDE_INT const_nunits;
|
||||
if (!TYPE_VECTOR_SUBPARTS
|
||||
(soft_fail_nunits_vectype).is_constant (&const_nunits)
|
||||
|| const_nunits > group_size)
|
||||
matches[0] = false;
|
||||
else
|
||||
{
|
||||
/* With constant vector elements simulate a mismatch at the
|
||||
point we need to split. */
|
||||
unsigned tail = group_size & (const_nunits - 1);
|
||||
memset (&matches[group_size - tail], 0, sizeof (bool) * tail);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue