re PR tree-optimization/18754 (unrolling happens too late/SRA does not happen late enough)
2008-04-27 Richard Guenther <rguenther@suse.de> PR tree-optimization/18754 PR tree-optimization/34223 * tree-pass.h (pass_complete_unrolli): Declare. * tree-ssa-loop-ivcanon.c (try_unroll_loop_completely): Print loop size before and after unconditionally of UL_NO_GROWTH in effect. Rewrite loop into loop closed SSA form if it is not already. (tree_unroll_loops_completely): Re-structure to iterate over innermost loops with intermediate CFG cleanups. Unroll outermost loops only if requested or the code does not grow doing so. * tree-ssa-loop.c (gate_tree_vectorize): Don't shortcut if no loops are available. (tree_vectorize): Instead do so here. (tree_complete_unroll): Also unroll outermost loops. (tree_complete_unroll_inner): New function. (gate_tree_complete_unroll_inner): Likewise. (pass_complete_unrolli): New pass. * tree-ssa-loop-manip.c (find_uses_to_rename_use): Only record uses outside of the loop. (tree_duplicate_loop_to_header_edge): Only verify loop-closed SSA form if it is available. * tree-flow.h (tree_unroll_loops_completely): Add extra parameter. * passes.c (init_optimization_passes): Schedule complete inner loop unrolling pass before the first CCP pass after final inlining. * gcc.dg/tree-ssa/loop-36.c: New testcase. * gcc.dg/tree-ssa/loop-37.c: Likewise. * gcc.dg/vect/vect-118.c: Likewise. * gcc.dg/Wunreachable-8.c: XFAIL bogus warning. * gcc.dg/vect/vect-66.c: Increase loop trip count. * gcc.dg/vect/no-section-anchors-vect-66.c: Likewise. * gcc.dg/vect/no-section-anchors-vect-69.c: Likewise. * gcc.dg/vect/vect-76.c: Likewise. * gcc.dg/vect/vect-outer-6.c: Likewise. * gcc.dg/vect/vect-outer-1.c: Likewise. * gcc.dg/vect/vect-outer-1a.c: Likewise. * gcc.dg/vect/vect-11a.c: Likewise. * gcc.dg/vect/vect-shift-1.c: Likewise. * gcc.target/i386/vectorize1.c: Likewise. From-SVN: r134730
This commit is contained in:
parent
feb60f0328
commit
d6e840eed2
22 changed files with 307 additions and 87 deletions
|
@ -1,3 +1,30 @@
|
|||
2008-04-27 Richard Guenther <rguenther@suse.de>
|
||||
|
||||
PR tree-optimization/18754
|
||||
PR tree-optimization/34223
|
||||
* tree-pass.h (pass_complete_unrolli): Declare.
|
||||
* tree-ssa-loop-ivcanon.c (try_unroll_loop_completely): Print
|
||||
loop size before and after unconditionally of UL_NO_GROWTH in effect.
|
||||
Rewrite loop into loop closed SSA form if it is not already.
|
||||
(tree_unroll_loops_completely): Re-structure to iterate over
|
||||
innermost loops with intermediate CFG cleanups.
|
||||
Unroll outermost loops only if requested or the code does not grow
|
||||
doing so.
|
||||
* tree-ssa-loop.c (gate_tree_vectorize): Don't shortcut if no
|
||||
loops are available.
|
||||
(tree_vectorize): Instead do so here.
|
||||
(tree_complete_unroll): Also unroll outermost loops.
|
||||
(tree_complete_unroll_inner): New function.
|
||||
(gate_tree_complete_unroll_inner): Likewise.
|
||||
(pass_complete_unrolli): New pass.
|
||||
* tree-ssa-loop-manip.c (find_uses_to_rename_use): Only record
|
||||
uses outside of the loop.
|
||||
(tree_duplicate_loop_to_header_edge): Only verify loop-closed SSA
|
||||
form if it is available.
|
||||
* tree-flow.h (tree_unroll_loops_completely): Add extra parameter.
|
||||
* passes.c (init_optimization_passes): Schedule complete inner
|
||||
loop unrolling pass before the first CCP pass after final inlining.
|
||||
|
||||
2008-04-27 Nathan Sidwell <nathan@codesourcery.com>
|
||||
|
||||
* targhooks.h (default_emutls_var_fields,
|
||||
|
|
|
@ -567,6 +567,7 @@ init_optimization_passes (void)
|
|||
NEXT_PASS (pass_rename_ssa_copies);
|
||||
|
||||
/* Initial scalar cleanups. */
|
||||
NEXT_PASS (pass_complete_unrolli);
|
||||
NEXT_PASS (pass_ccp);
|
||||
NEXT_PASS (pass_phiprop);
|
||||
NEXT_PASS (pass_fre);
|
||||
|
|
|
@ -1,3 +1,22 @@
|
|||
2008-04-27 Richard Guenther <rguenther@suse.de>
|
||||
|
||||
PR tree-optimization/18754
|
||||
PR tree-optimization/34223
|
||||
* gcc.dg/tree-ssa/loop-36.c: New testcase.
|
||||
* gcc.dg/tree-ssa/loop-37.c: Likewise.
|
||||
* gcc.dg/vect/vect-118.c: Likewise.
|
||||
* gcc.dg/Wunreachable-8.c: XFAIL bogus warning.
|
||||
* gcc.dg/vect/vect-66.c: Increase loop trip count.
|
||||
* gcc.dg/vect/no-section-anchors-vect-66.c: Likewise.
|
||||
* gcc.dg/vect/no-section-anchors-vect-69.c: Likewise.
|
||||
* gcc.dg/vect/vect-76.c: Likewise.
|
||||
* gcc.dg/vect/vect-outer-6.c: Likewise.
|
||||
* gcc.dg/vect/vect-outer-1.c: Likewise.
|
||||
* gcc.dg/vect/vect-outer-1a.c: Likewise.
|
||||
* gcc.dg/vect/vect-11a.c: Likewise.
|
||||
* gcc.dg/vect/vect-shift-1.c: Likewise.
|
||||
* gcc.target/i386/vectorize1.c: Likewise.
|
||||
|
||||
2008-04-27 Nathan Sidwell <nathan@codesourcery.com>
|
||||
|
||||
* gcc.dg/tls/section-2.c: New.
|
||||
|
|
|
@ -6,7 +6,7 @@ float Factorial(float X)
|
|||
int k,j;
|
||||
for (k=1; k < 5; k++)
|
||||
{
|
||||
val += 1.0;
|
||||
val += 1.0; /* { dg-bogus "will never be executed" "" { xfail *-*-* } } */
|
||||
}
|
||||
return (val); /* { dg-bogus "will never be executed" } */
|
||||
}
|
||||
|
|
21
gcc/testsuite/gcc.dg/tree-ssa/loop-36.c
Normal file
21
gcc/testsuite/gcc.dg/tree-ssa/loop-36.c
Normal file
|
@ -0,0 +1,21 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -fdump-tree-dce2" } */
|
||||
|
||||
struct X { float array[4]; };
|
||||
|
||||
struct X a,b;
|
||||
|
||||
float foobar () {
|
||||
float s = 0;
|
||||
unsigned int d;
|
||||
struct X c;
|
||||
for (d=0; d<4; ++d)
|
||||
c.array[d] = a.array[d] * b.array[d];
|
||||
for (d=0; d<4; ++d)
|
||||
s+=c.array[d];
|
||||
return s;
|
||||
}
|
||||
|
||||
/* The temporary structure should have been promoted to registers
|
||||
by FRE after the loops have been unrolled by the early unrolling pass. */
|
||||
/* { dg-final { scan-tree-dump-not "c\.array" "dce2" } } */
|
27
gcc/testsuite/gcc.dg/tree-ssa/loop-37.c
Normal file
27
gcc/testsuite/gcc.dg/tree-ssa/loop-37.c
Normal file
|
@ -0,0 +1,27 @@
|
|||
/* { dg-do link } */
|
||||
/* { dg-options "-O2 -fdump-tree-optimized" } */
|
||||
|
||||
extern void link_error (void);
|
||||
static const int my_array [3] = { 4, 5, 6 };
|
||||
|
||||
void f0 (void)
|
||||
{
|
||||
int j, sum = 0;
|
||||
for (j = 0; j < 3; j ++)
|
||||
sum += my_array [j];
|
||||
if (15 != sum)
|
||||
link_error ();
|
||||
}
|
||||
|
||||
int f1 (int a [])
|
||||
{
|
||||
int j, sum = 0;
|
||||
for (j = 0; j < 3; j ++)
|
||||
sum += a [j] + my_array [j];
|
||||
return sum;
|
||||
}
|
||||
|
||||
int main() { }
|
||||
|
||||
/* { dg-final { scan-tree-dump-not "my_array" "optimized" } } */
|
||||
/* { dg-final { cleanup-tree-dump "optimized" } } */
|
|
@ -3,11 +3,10 @@
|
|||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 16
|
||||
#define N 8
|
||||
|
||||
int ib[6] = {0,3,6,9,12,15};
|
||||
int ia[8][5][6];
|
||||
int ic[16][16][5][6];
|
||||
int ia[8][5][N+2];
|
||||
int ic[16][16][5][N+2];
|
||||
|
||||
__attribute__ ((noinline))
|
||||
int main1 ()
|
||||
|
@ -17,7 +16,7 @@ int main1 ()
|
|||
/* Multidimensional array. Aligned. */
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
for (j = 0; j < 4; j++)
|
||||
for (j = 0; j < N; j++)
|
||||
{
|
||||
ia[2][6][j] = 5;
|
||||
}
|
||||
|
@ -26,7 +25,7 @@ int main1 ()
|
|||
/* check results: */
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
for (j = 0; j < 4; j++)
|
||||
for (j = 0; j < N; j++)
|
||||
{
|
||||
if (ia[2][6][j] != 5)
|
||||
abort();
|
||||
|
@ -35,14 +34,14 @@ int main1 ()
|
|||
/* Multidimensional array. Aligned. */
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
for (j = 0; j < 4; j++)
|
||||
for (j = 0; j < N; j++)
|
||||
ia[3][6][j+2] = 5;
|
||||
}
|
||||
|
||||
/* check results: */
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
for (j = 2; j < 6; j++)
|
||||
for (j = 2; j < N+2; j++)
|
||||
{
|
||||
if (ia[3][6][j] != 5)
|
||||
abort();
|
||||
|
@ -52,7 +51,7 @@ int main1 ()
|
|||
/* Multidimensional array. Not aligned. */
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
for (j = 0; j < 4; j++)
|
||||
for (j = 0; j < N; j++)
|
||||
{
|
||||
ic[2][1][6][j+1] = 5;
|
||||
}
|
||||
|
@ -61,7 +60,7 @@ int main1 ()
|
|||
/* check results: */
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
for (j = 0; j < 4; j++)
|
||||
for (j = 0; j < N; j++)
|
||||
{
|
||||
if (ic[2][1][6][j+1] != 5)
|
||||
abort();
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 8
|
||||
#define N 12
|
||||
|
||||
struct s{
|
||||
int m;
|
||||
|
|
34
gcc/testsuite/gcc.dg/vect/vect-118.c
Normal file
34
gcc/testsuite/gcc.dg/vect/vect-118.c
Normal file
|
@ -0,0 +1,34 @@
|
|||
/* { dg-require-effective-target vect_int } */
|
||||
/* { dg-options "-O3 -fdump-tree-vect-details" } */
|
||||
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define M 10
|
||||
#define N 3
|
||||
|
||||
void __attribute__((noinline))
|
||||
foo (int n, int *ub, int *uc)
|
||||
{
|
||||
int i, j, tmp1;
|
||||
|
||||
for (i = 0; i < n; i++)
|
||||
{
|
||||
tmp1 = 0;
|
||||
for (j = 0; j < M; j++)
|
||||
{
|
||||
tmp1 += uc[i] * ((int)(j << N) / M);
|
||||
}
|
||||
ub[i] = tmp1;
|
||||
}
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
int uc[16], ub[16];
|
||||
check_vect ();
|
||||
foo (16, uc, ub);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
|
@ -9,15 +9,18 @@ extern void abort (void);
|
|||
__attribute__ ((noinline))
|
||||
void u ()
|
||||
{
|
||||
unsigned int A[4] = {0x08000000,0xffffffff,0xff0000ff,0xf0000001};
|
||||
unsigned int B[4] = {0x08000000,0x08000001,0xff0000ff,0xf0000001};
|
||||
unsigned int Answer[4] = {0,0xf7ffffff,0x0200fe01,0xe0000001};
|
||||
unsigned int C[4];
|
||||
unsigned int A[8] = {0x08000000,0xffffffff,0xff0000ff,0xf0000001,
|
||||
0x08000000,0xffffffff,0xff0000ff,0xf0000001};
|
||||
unsigned int B[8] = {0x08000000,0x08000001,0xff0000ff,0xf0000001,
|
||||
0x08000000,0x08000001,0xff0000ff,0xf0000001};
|
||||
unsigned int Answer[8] = {0,0xf7ffffff,0x0200fe01,0xe0000001,
|
||||
0,0xf7ffffff,0x0200fe01,0xe0000001};
|
||||
unsigned int C[8];
|
||||
int i, j;
|
||||
|
||||
for (i=0; i<4; i++)
|
||||
for (i=0; i<8; i++)
|
||||
C[i] = A[i] * B[i];
|
||||
for (i=0; i<4; i++)
|
||||
for (i=0; i<8; i++)
|
||||
if (C[i] != Answer[i])
|
||||
abort ();
|
||||
}
|
||||
|
@ -25,15 +28,18 @@ void u ()
|
|||
__attribute__ ((noinline))
|
||||
void s()
|
||||
{
|
||||
signed int A[4] = {0x08000000,0xffffffff,0xff0000ff,0xf0000001};
|
||||
signed int B[4] = {0x08000000,0x08000001,0xff0000ff,0xf0000001};
|
||||
signed int Answer[4] = {0,0xf7ffffff,0x0200fe01, 0xe0000001};
|
||||
signed int C[4];
|
||||
signed int A[8] = {0x08000000,0xffffffff,0xff0000ff,0xf0000001,
|
||||
0x08000000,0xffffffff,0xff0000ff,0xf0000001};
|
||||
signed int B[8] = {0x08000000,0x08000001,0xff0000ff,0xf0000001,
|
||||
0x08000000,0x08000001,0xff0000ff,0xf0000001};
|
||||
signed int Answer[8] = {0,0xf7ffffff,0x0200fe01, 0xe0000001,
|
||||
0,0xf7ffffff,0x0200fe01, 0xe0000001};
|
||||
signed int C[8];
|
||||
int i, j;
|
||||
|
||||
for (i=0; i<4; i++)
|
||||
for (i=0; i<8; i++)
|
||||
C[i] = A[i] * B[i];
|
||||
for (i=0; i<4; i++)
|
||||
for (i=0; i<8; i++)
|
||||
if (C[i] != Answer[i])
|
||||
abort ();
|
||||
}
|
||||
|
|
|
@ -3,20 +3,18 @@
|
|||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 16
|
||||
#define N 8
|
||||
|
||||
__attribute__ ((noinline))
|
||||
int main1 ()
|
||||
void main1 ()
|
||||
{
|
||||
int i, j;
|
||||
int ib[6] = {0,3,6,9,12,15};
|
||||
int ia[8][5][6];
|
||||
int ic[16][16][5][6];
|
||||
int ia[8][5][N+2];
|
||||
|
||||
/* Multidimensional array. Aligned. */
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
for (j = 0; j < 4; j++)
|
||||
for (j = 0; j < N; j++)
|
||||
{
|
||||
ia[2][6][j] = 5;
|
||||
}
|
||||
|
@ -25,33 +23,48 @@ int main1 ()
|
|||
/* check results: */
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
for (j = 0; j < 4; j++)
|
||||
for (j = 0; j < N; j++)
|
||||
{
|
||||
if (ia[2][6][j] != 5)
|
||||
abort();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__attribute__ ((noinline))
|
||||
void main2 ()
|
||||
{
|
||||
int i, j;
|
||||
int ia[8][5][N+2];
|
||||
|
||||
/* Multidimensional array. Aligned. */
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
for (j = 0; j < 4; j++)
|
||||
for (j = 0; j < N; j++)
|
||||
ia[3][6][j+2] = 5;
|
||||
}
|
||||
|
||||
/* check results: */
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
for (j = 2; j < 6; j++)
|
||||
for (j = 2; j < N+2; j++)
|
||||
{
|
||||
if (ia[3][6][j] != 5)
|
||||
abort();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__attribute__ ((noinline))
|
||||
void main3 ()
|
||||
{
|
||||
int i, j;
|
||||
int ic[16][16][5][N+2];
|
||||
|
||||
/* Multidimensional array. Not aligned. */
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
for (j = 0; j < 4; j++)
|
||||
for (j = 0; j < N; j++)
|
||||
{
|
||||
ic[2][1][6][j+1] = 5;
|
||||
}
|
||||
|
@ -60,22 +73,24 @@ int main1 ()
|
|||
/* check results: */
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
for (j = 0; j < 4; j++)
|
||||
for (j = 0; j < N; j++)
|
||||
{
|
||||
if (ic[2][1][6][j+1] != 5)
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
|
||||
return main1 ();
|
||||
main1 ();
|
||||
main2 ();
|
||||
main3 ();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 3 "vect" } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 8
|
||||
#define N 12
|
||||
#define OFF 4
|
||||
|
||||
/* Check handling of accesses for which the "initial condition" -
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/* { dg-do compile } */
|
||||
|
||||
#define N 40
|
||||
#define N 64
|
||||
signed short image[N][N] __attribute__ ((__aligned__(16)));
|
||||
signed short block[N][N] __attribute__ ((__aligned__(16)));
|
||||
signed short out[N] __attribute__ ((__aligned__(16)));
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
/* { dg-do compile } */
|
||||
|
||||
#define N 40
|
||||
#define N 64
|
||||
signed short image[N][N] __attribute__ ((__aligned__(16)));
|
||||
signed short block[N][N] __attribute__ ((__aligned__(16)));
|
||||
|
||||
|
|
|
@ -20,10 +20,10 @@ int main1 ()
|
|||
|
||||
int i, j;
|
||||
|
||||
for (i = 0; i < 8; i++)
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
s = 0;
|
||||
for (j=0; j<8; j+=4)
|
||||
for (j = 0; j < N; j += 4)
|
||||
s += C[j];
|
||||
A[i] = s;
|
||||
}
|
||||
|
@ -49,10 +49,10 @@ int main ()
|
|||
main1();
|
||||
|
||||
/* check results: */
|
||||
for (i = 0; i < 8; i++)
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
s = 0;
|
||||
for (j=0; j<8; j+=4)
|
||||
for (j = 0; j < N; j += 4)
|
||||
s += C[j];
|
||||
if (A[i] != s)
|
||||
abort ();
|
||||
|
|
|
@ -4,12 +4,14 @@
|
|||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 4
|
||||
#define N 8
|
||||
|
||||
int main ()
|
||||
{
|
||||
unsigned int A[N] = { 0x08000000, 0x08000001, 0x0ff0000ff, 0xf0000001 };
|
||||
unsigned int B[N] = { 0x01000000, 0x01000000, 0x01fe0001f, 0x1e000000 };
|
||||
unsigned int A[N] = { 0x08000000, 0x08000001, 0x0ff0000ff, 0xf0000001,
|
||||
0x08000000, 0x08000001, 0x0ff0000ff, 0xf0000001 };
|
||||
unsigned int B[N] = { 0x01000000, 0x01000000, 0x01fe0001f, 0x1e000000,
|
||||
0x01000000, 0x01000000, 0x01fe0001f, 0x1e000000 };
|
||||
int i;
|
||||
|
||||
check_vect ();
|
||||
|
|
|
@ -4,14 +4,14 @@
|
|||
extern char lanip[3][40];
|
||||
typedef struct
|
||||
{
|
||||
char *t[4];
|
||||
char *t[8];
|
||||
}tx_typ;
|
||||
|
||||
int set_names (void)
|
||||
{
|
||||
static tx_typ tt1;
|
||||
int ln;
|
||||
for (ln = 0; ln < 4; ln++)
|
||||
for (ln = 0; ln < 8; ln++)
|
||||
tt1.t[ln] = lanip[1];
|
||||
}
|
||||
|
||||
|
|
|
@ -1018,7 +1018,7 @@ basic_block *blocks_in_phiopt_order (void);
|
|||
void tree_ssa_lim (void);
|
||||
unsigned int tree_ssa_unswitch_loops (void);
|
||||
unsigned int canonicalize_induction_variables (void);
|
||||
unsigned int tree_unroll_loops_completely (bool);
|
||||
unsigned int tree_unroll_loops_completely (bool, bool);
|
||||
unsigned int tree_ssa_prefetch_arrays (void);
|
||||
unsigned int remove_empty_loops (void);
|
||||
void tree_ssa_iv_optimize (void);
|
||||
|
|
|
@ -290,6 +290,7 @@ extern struct gimple_opt_pass pass_if_conversion;
|
|||
extern struct gimple_opt_pass pass_loop_distribution;
|
||||
extern struct gimple_opt_pass pass_vectorize;
|
||||
extern struct gimple_opt_pass pass_complete_unroll;
|
||||
extern struct gimple_opt_pass pass_complete_unrolli;
|
||||
extern struct gimple_opt_pass pass_parallelize_loops;
|
||||
extern struct gimple_opt_pass pass_loop_prefetch;
|
||||
extern struct gimple_opt_pass pass_iv_optimize;
|
||||
|
|
|
@ -187,23 +187,20 @@ try_unroll_loop_completely (struct loop *loop,
|
|||
> (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS))
|
||||
return false;
|
||||
|
||||
if (ul == UL_NO_GROWTH)
|
||||
unr_insns = estimated_unrolled_size (ninsns, n_unroll);
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
{
|
||||
fprintf (dump_file, " Loop size: %d\n", (int) ninsns);
|
||||
fprintf (dump_file, " Estimated size after unrolling: %d\n",
|
||||
(int) unr_insns);
|
||||
}
|
||||
|
||||
if (ul == UL_NO_GROWTH
|
||||
&& unr_insns > ninsns)
|
||||
{
|
||||
unr_insns = estimated_unrolled_size (ninsns, n_unroll);
|
||||
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
{
|
||||
fprintf (dump_file, " Loop size: %d\n", (int) ninsns);
|
||||
fprintf (dump_file, " Estimated size after unrolling: %d\n",
|
||||
(int) unr_insns);
|
||||
}
|
||||
|
||||
if (unr_insns > ninsns)
|
||||
{
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
fprintf (dump_file, "Not unrolling loop %d:\n", loop->num);
|
||||
return false;
|
||||
}
|
||||
fprintf (dump_file, "Not unrolling loop %d.\n", loop->num);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -339,30 +336,45 @@ canonicalize_induction_variables (void)
|
|||
size of the code does not increase. */
|
||||
|
||||
unsigned int
|
||||
tree_unroll_loops_completely (bool may_increase_size)
|
||||
tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer)
|
||||
{
|
||||
loop_iterator li;
|
||||
struct loop *loop;
|
||||
bool changed = false;
|
||||
bool changed;
|
||||
enum unroll_level ul;
|
||||
|
||||
FOR_EACH_LOOP (li, loop, 0)
|
||||
do
|
||||
{
|
||||
if (may_increase_size && maybe_hot_bb_p (loop->header))
|
||||
ul = UL_ALL;
|
||||
else
|
||||
ul = UL_NO_GROWTH;
|
||||
changed |= canonicalize_loop_induction_variables (loop,
|
||||
false, ul,
|
||||
!flag_tree_loop_ivcanon);
|
||||
changed = false;
|
||||
|
||||
FOR_EACH_LOOP (li, loop, LI_ONLY_INNERMOST)
|
||||
{
|
||||
if (may_increase_size && maybe_hot_bb_p (loop->header)
|
||||
/* Unroll outermost loops only if asked to do so or they do
|
||||
not cause code growth. */
|
||||
&& (unroll_outer
|
||||
|| loop_outer (loop_outer (loop))))
|
||||
ul = UL_ALL;
|
||||
else
|
||||
ul = UL_NO_GROWTH;
|
||||
changed |= canonicalize_loop_induction_variables
|
||||
(loop, false, ul, !flag_tree_loop_ivcanon);
|
||||
}
|
||||
|
||||
if (changed)
|
||||
{
|
||||
/* This will take care of removing completely unrolled loops
|
||||
from the loop structures so we can continue unrolling now
|
||||
innermost loops. */
|
||||
cleanup_tree_cfg ();
|
||||
|
||||
/* Clean up the information about numbers of iterations, since
|
||||
complete unrolling might have invalidated it. */
|
||||
scev_reset ();
|
||||
}
|
||||
}
|
||||
while (changed);
|
||||
|
||||
/* Clean up the information about numbers of iterations, since complete
|
||||
unrolling might have invalidated it. */
|
||||
scev_reset ();
|
||||
|
||||
if (changed)
|
||||
return TODO_cleanup_cfg;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -248,10 +248,15 @@ find_uses_to_rename_use (basic_block bb, tree use, bitmap *use_blocks,
|
|||
return;
|
||||
def_loop = def_bb->loop_father;
|
||||
|
||||
/* If the definition is not inside loop, it is not interesting. */
|
||||
/* If the definition is not inside a loop, it is not interesting. */
|
||||
if (!loop_outer (def_loop))
|
||||
return;
|
||||
|
||||
/* If the use is not outside of the loop it is defined in, it is not
|
||||
interesting. */
|
||||
if (flow_bb_inside_loop_p (def_loop, bb))
|
||||
return;
|
||||
|
||||
if (!use_blocks[ver])
|
||||
use_blocks[ver] = BITMAP_ALLOC (NULL);
|
||||
bitmap_set_bit (use_blocks[ver], bb->index);
|
||||
|
@ -592,7 +597,8 @@ tree_duplicate_loop_to_header_edge (struct loop *loop, edge e,
|
|||
return false;
|
||||
|
||||
#ifdef ENABLE_CHECKING
|
||||
verify_loop_closed_ssa ();
|
||||
if (loops_state_satisfies_p (LOOP_CLOSED_SSA))
|
||||
verify_loop_closed_ssa ();
|
||||
#endif
|
||||
|
||||
first_new_block = last_basic_block;
|
||||
|
|
|
@ -223,13 +223,16 @@ struct gimple_opt_pass pass_predcom =
|
|||
static unsigned int
|
||||
tree_vectorize (void)
|
||||
{
|
||||
if (number_of_loops () <= 1)
|
||||
return 0;
|
||||
|
||||
return vectorize_loops ();
|
||||
}
|
||||
|
||||
static bool
|
||||
gate_tree_vectorize (void)
|
||||
{
|
||||
return flag_tree_vectorize && number_of_loops () > 1;
|
||||
return flag_tree_vectorize;
|
||||
}
|
||||
|
||||
struct gimple_opt_pass pass_vectorize =
|
||||
|
@ -466,7 +469,7 @@ tree_complete_unroll (void)
|
|||
|
||||
return tree_unroll_loops_completely (flag_unroll_loops
|
||||
|| flag_peel_loops
|
||||
|| optimize >= 3);
|
||||
|| optimize >= 3, true);
|
||||
}
|
||||
|
||||
static bool
|
||||
|
@ -495,6 +498,53 @@ struct gimple_opt_pass pass_complete_unroll =
|
|||
}
|
||||
};
|
||||
|
||||
/* Complete unrolling of inner loops. */
|
||||
|
||||
static unsigned int
|
||||
tree_complete_unroll_inner (void)
|
||||
{
|
||||
unsigned ret = 0;
|
||||
|
||||
loop_optimizer_init (LOOPS_NORMAL
|
||||
| LOOPS_HAVE_RECORDED_EXITS);
|
||||
if (number_of_loops () > 1)
|
||||
{
|
||||
scev_initialize ();
|
||||
ret = tree_unroll_loops_completely (optimize >= 3, false);
|
||||
free_numbers_of_iterations_estimates ();
|
||||
scev_finalize ();
|
||||
}
|
||||
loop_optimizer_finalize ();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool
|
||||
gate_tree_complete_unroll_inner (void)
|
||||
{
|
||||
return optimize >= 2;
|
||||
}
|
||||
|
||||
struct gimple_opt_pass pass_complete_unrolli =
|
||||
{
|
||||
{
|
||||
GIMPLE_PASS,
|
||||
"cunrolli", /* name */
|
||||
gate_tree_complete_unroll_inner, /* gate */
|
||||
tree_complete_unroll_inner, /* execute */
|
||||
NULL, /* sub */
|
||||
NULL, /* next */
|
||||
0, /* static_pass_number */
|
||||
TV_COMPLETE_UNROLL, /* tv_id */
|
||||
PROP_cfg | PROP_ssa, /* properties_required */
|
||||
0, /* properties_provided */
|
||||
0, /* properties_destroyed */
|
||||
0, /* todo_flags_start */
|
||||
TODO_dump_func | TODO_verify_loops
|
||||
| TODO_ggc_collect /* todo_flags_finish */
|
||||
}
|
||||
};
|
||||
|
||||
/* Parallelization. */
|
||||
|
||||
static bool
|
||||
|
|
Loading…
Add table
Reference in a new issue