loop-split improvements, part 3
extend tree-ssa-loop-split to understand test of the form if (i==0) and if (i!=0) which triggers only during the first iteration. Naturally we should also be able to trigger last iteration or split into 3 cases if the test indeed can fire in the middle of the loop. Last iteration is bit trickier pattern matching so I want to do it incrementally, but I implemented easy case using value range that handled loops with constant iterations. The testcase gets misupdated profile, I will also fix that incrementally. gcc/ChangeLog: PR middle-end/77689 * tree-ssa-loop-split.cc: Include value-query.h. (split_at_bb_p): Analyze cases where EQ/NE can be turned into LT/LE/GT/GE; return updated guard code. (split_loop): Use guard code. gcc/testsuite/ChangeLog: PR middle-end/77689 * g++.dg/tree-ssa/loop-split-1.C: New test.
This commit is contained in:
parent
095eb138f7
commit
f5fb9ff239
2 changed files with 80 additions and 17 deletions
23
gcc/testsuite/g++.dg/tree-ssa/loop-split-1.C
Normal file
23
gcc/testsuite/g++.dg/tree-ssa/loop-split-1.C
Normal file
|
@ -0,0 +1,23 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O3 -fdump-tree-lsplit-details -std=c++11" } */
|
||||
#include <vector>
|
||||
#include <cmath>
|
||||
|
||||
constexpr unsigned s = 100000000;
|
||||
|
||||
int main()
|
||||
{
|
||||
std::vector<float> a, b, c;
|
||||
a.reserve(s);
|
||||
b.reserve(s);
|
||||
c.reserve(s);
|
||||
|
||||
for(unsigned i = 0; i < s; ++i)
|
||||
{
|
||||
if(i == 0)
|
||||
a[i] = b[i] * c[i];
|
||||
else
|
||||
a[i] = (b[i] + c[i]) * c[i-1] * std::log(i);
|
||||
}
|
||||
}
|
||||
/* { dg-final { scan-tree-dump-times "loop split" 1 "lsplit" } } */
|
|
@ -42,6 +42,7 @@ along with GCC; see the file COPYING3. If not see
|
|||
#include "gimple-fold.h"
|
||||
#include "gimplify-me.h"
|
||||
#include "print-tree.h"
|
||||
#include "value-query.h"
|
||||
|
||||
/* This file implements two kinds of loop splitting.
|
||||
|
||||
|
@ -75,7 +76,8 @@ along with GCC; see the file COPYING3. If not see
|
|||
point in *BORDER and the comparison induction variable in IV. */
|
||||
|
||||
static tree
|
||||
split_at_bb_p (class loop *loop, basic_block bb, tree *border, affine_iv *iv)
|
||||
split_at_bb_p (class loop *loop, basic_block bb, tree *border, affine_iv *iv,
|
||||
enum tree_code *guard_code)
|
||||
{
|
||||
gcond *stmt;
|
||||
affine_iv iv2;
|
||||
|
@ -87,19 +89,6 @@ split_at_bb_p (class loop *loop, basic_block bb, tree *border, affine_iv *iv)
|
|||
|
||||
enum tree_code code = gimple_cond_code (stmt);
|
||||
|
||||
/* Only handle relational comparisons, for equality and non-equality
|
||||
we'd have to split the loop into two loops and a middle statement. */
|
||||
switch (code)
|
||||
{
|
||||
case LT_EXPR:
|
||||
case LE_EXPR:
|
||||
case GT_EXPR:
|
||||
case GE_EXPR:
|
||||
break;
|
||||
default:
|
||||
return NULL_TREE;
|
||||
}
|
||||
|
||||
if (loop_exits_from_bb_p (loop, bb))
|
||||
return NULL_TREE;
|
||||
|
||||
|
@ -129,6 +118,56 @@ split_at_bb_p (class loop *loop, basic_block bb, tree *border, affine_iv *iv)
|
|||
if (!iv->no_overflow)
|
||||
return NULL_TREE;
|
||||
|
||||
/* Only handle relational comparisons, for equality and non-equality
|
||||
we'd have to split the loop into two loops and a middle statement. */
|
||||
switch (code)
|
||||
{
|
||||
case LT_EXPR:
|
||||
case LE_EXPR:
|
||||
case GT_EXPR:
|
||||
case GE_EXPR:
|
||||
break;
|
||||
case NE_EXPR:
|
||||
case EQ_EXPR:
|
||||
/* If the test check for first iteration, we can handle NE/EQ
|
||||
with only one split loop. */
|
||||
if (operand_equal_p (iv->base, iv2.base, 0))
|
||||
{
|
||||
if (code == EQ_EXPR)
|
||||
code = !tree_int_cst_sign_bit (iv->step) ? LE_EXPR : GE_EXPR;
|
||||
else
|
||||
code = !tree_int_cst_sign_bit (iv->step) ? GT_EXPR : LT_EXPR;
|
||||
break;
|
||||
}
|
||||
/* Similarly when the test checks for minimal or maximal
|
||||
value range. */
|
||||
else
|
||||
{
|
||||
int_range<2> r;
|
||||
get_global_range_query ()->range_of_expr (r, op0, stmt);
|
||||
if (!r.varying_p () && !r.undefined_p ()
|
||||
&& TREE_CODE (op1) == INTEGER_CST)
|
||||
{
|
||||
wide_int val = wi::to_wide (op1);
|
||||
if (known_eq (val, r.lower_bound ()))
|
||||
{
|
||||
code = (code == EQ_EXPR) ? LE_EXPR : GT_EXPR;
|
||||
break;
|
||||
}
|
||||
else if (known_eq (val, r.upper_bound ()))
|
||||
{
|
||||
code = (code == EQ_EXPR) ? GE_EXPR : LT_EXPR;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
/* TODO: We can compare with exit condition; it seems that testing for
|
||||
last iteration is common case. */
|
||||
return NULL_TREE;
|
||||
default:
|
||||
return NULL_TREE;
|
||||
}
|
||||
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
{
|
||||
fprintf (dump_file, "Found potential split point: ");
|
||||
|
@ -143,6 +182,7 @@ split_at_bb_p (class loop *loop, basic_block bb, tree *border, affine_iv *iv)
|
|||
}
|
||||
|
||||
*border = iv2.base;
|
||||
*guard_code = code;
|
||||
return op0;
|
||||
}
|
||||
|
||||
|
@ -551,7 +591,7 @@ split_loop (class loop *loop1)
|
|||
{
|
||||
if (!niter.control.no_overflow)
|
||||
return false;
|
||||
if (tree_int_cst_sign_bit (niter.control.step) > 0)
|
||||
if (tree_int_cst_sign_bit (niter.control.step))
|
||||
niter.cmp = GT_EXPR;
|
||||
else
|
||||
niter.cmp = LT_EXPR;
|
||||
|
@ -566,8 +606,9 @@ split_loop (class loop *loop1)
|
|||
}
|
||||
|
||||
/* Find a splitting opportunity. */
|
||||
enum tree_code guard_code;
|
||||
for (i = 0; i < loop1->num_nodes; i++)
|
||||
if ((guard_iv = split_at_bb_p (loop1, bbs[i], &border, &iv)))
|
||||
if ((guard_iv = split_at_bb_p (loop1, bbs[i], &border, &iv, &guard_code)))
|
||||
{
|
||||
profile_count entry_count = loop_preheader_edge (loop1)->count ();
|
||||
/* Handling opposite steps is not implemented yet. Neither
|
||||
|
@ -585,7 +626,6 @@ split_loop (class loop *loop1)
|
|||
gcond *guard_stmt = as_a<gcond *> (*gsi_last_bb (bbs[i]));
|
||||
tree guard_init = PHI_ARG_DEF_FROM_EDGE (phi,
|
||||
loop_preheader_edge (loop1));
|
||||
enum tree_code guard_code = gimple_cond_code (guard_stmt);
|
||||
|
||||
/* Loop splitting is implemented by versioning the loop, placing
|
||||
the new loop after the old loop, make the first loop iterate
|
||||
|
|
Loading…
Add table
Reference in a new issue