Check TYPE_OVERFLOW_WRAPS for parloops reductions

2015-07-23  Tom de Vries  <tom@codesourcery.com>

	* tree-parloops.c (gather_scalar_reductions): Add arg to call to
	vect_force_simple_reduction.
	* tree-vect-loop.c (vect_analyze_scalar_cycles_1): Same.
	(vect_is_simple_reduction_1): Add and handle
	need_wrapping_integral_overflow parameter.
	(vect_is_simple_reduction, vect_force_simple_reduction): Add and pass
	need_wrapping_integral_overflow parameter.
	(vectorizable_reduction): Add arg to call to vect_is_simple_reduction.
	* tree-vectorizer.h (vect_force_simple_reduction): Add parameter to decl.

	* gcc.dg/autopar/outer-4.c: Add xfail.
	* gcc.dg/autopar/outer-5.c: Same.
	* gcc.dg/autopar/outer-6.c: Same.
	* gcc.dg/autopar/reduc-2.c: Same.
	* gcc.dg/autopar/reduc-2char.c: Same.
	* gcc.dg/autopar/reduc-2short.c: Same.
	* gcc.dg/autopar/reduc-8.c: Same.
	* gcc.dg/autopar/uns-outer-4.c: New test.
	* gcc.dg/autopar/uns-outer-5.c: New test.
	* gcc.dg/autopar/uns-outer-6.c: New test.

From-SVN: r226107
This commit is contained in:
Tom de Vries 2015-07-23 12:17:52 +00:00 committed by Tom de Vries
parent f45dacba22
commit bcdaf37084
15 changed files with 208 additions and 30 deletions

View file

@ -1,3 +1,15 @@
2015-07-23 Tom de Vries <tom@codesourcery.com>
* tree-parloops.c (gather_scalar_reductions): Add arg to call to
vect_force_simple_reduction.
* tree-vect-loop.c (vect_analyze_scalar_cycles_1): Same.
(vect_is_simple_reduction_1): Add and handle
need_wrapping_integral_overflow parameter.
(vect_is_simple_reduction, vect_force_simple_reduction): Add and pass
need_wrapping_integral_overflow parameter.
(vectorizable_reduction): Add arg to call to vect_is_simple_reduction.
* tree-vectorizer.h (vect_force_simple_reduction): Add parameter to decl.
2015-07-23 Yuri Rumyantsev <ysrumyan@gmail.com>
PR tree-optimization/66926,66951

View file

@ -1,3 +1,16 @@
2015-07-23 Tom de Vries <tom@codesourcery.com>
* gcc.dg/autopar/outer-4.c: Add xfail.
* gcc.dg/autopar/outer-5.c: Same.
* gcc.dg/autopar/outer-6.c: Same.
* gcc.dg/autopar/reduc-2.c: Same.
* gcc.dg/autopar/reduc-2char.c: Same.
* gcc.dg/autopar/reduc-2short.c: Same.
* gcc.dg/autopar/reduc-8.c: Same.
* gcc.dg/autopar/uns-outer-4.c: New test.
* gcc.dg/autopar/uns-outer-5.c: New test.
* gcc.dg/autopar/uns-outer-6.c: New test.
2015-07-23 Richard Biener <rguenther@suse.de>
PR tree-optimization/66952

View file

@ -32,4 +32,4 @@ int main(void)
/* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops" { xfail *-*-* } } } */
/* { dg-final { scan-tree-dump-times "loopfn" 4 "optimized" } } */
/* { dg-final { scan-tree-dump-times "loopfn" 4 "optimized" { xfail *-*-* } } } */

View file

@ -45,4 +45,4 @@ int main(void)
}
/* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops" { xfail *-*-* } } } */
/* { dg-final { scan-tree-dump-times "loopfn" 4 "optimized" } } */
/* { dg-final { scan-tree-dump-times "loopfn" 4 "optimized" { xfail *-*-* } } } */

View file

@ -44,6 +44,6 @@ int main(void)
/* Check that outer loop is parallelized. */
/* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops" } } */
/* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops" { xfail *-*-* } } } */
/* { dg-final { scan-tree-dump-times "parallelizing inner loop" 0 "parloops" } } */
/* { dg-final { scan-tree-dump-times "loopfn" 4 "optimized" } } */
/* { dg-final { scan-tree-dump-times "loopfn" 4 "optimized" { xfail *-*-* } } } */

View file

@ -63,6 +63,6 @@ int main (void)
return 0;
}
/* { dg-final { scan-tree-dump-times "Detected reduction" 3 "parloops" } } */
/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 4 "parloops" } } */
/* { dg-final { scan-tree-dump-times "Detected reduction" 3 "parloops" { xfail *-*-* } } } */
/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 4 "parloops" { xfail *-*-* } } } */

View file

@ -60,7 +60,7 @@ int main (void)
}
/* { dg-final { scan-tree-dump-times "Detected reduction" 2 "parloops" } } */
/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops" } } */
/* { dg-final { scan-tree-dump-times "Detected reduction" 2 "parloops" { xfail *-*-* } } } */
/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops" { xfail *-*-* } } } */

View file

@ -59,6 +59,6 @@ int main (void)
}
/* { dg-final { scan-tree-dump-times "Detected reduction" 2 "parloops" } } */
/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops" } } */
/* { dg-final { scan-tree-dump-times "Detected reduction" 2 "parloops" { xfail *-*-* } } } */
/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops" { xfail *-*-* } } } */

View file

@ -84,5 +84,5 @@ int main (void)
}
/* { dg-final { scan-tree-dump-times "Detected reduction" 2 "parloops" } } */
/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops" } } */
/* { dg-final { scan-tree-dump-times "Detected reduction" 2 "parloops" { xfail *-*-* } } } */
/* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 3 "parloops" { xfail *-*-* } } } */

View file

@ -0,0 +1,36 @@
/* { dg-do compile } */
/* { dg-options "-O2 -ftree-parallelize-loops=4 -fdump-tree-parloops-details -fdump-tree-optimized" } */
void abort (void);
unsigned int g_sum=0;
unsigned int x[500][500];
void __attribute__((noinline))
parloop (int N)
{
int i, j;
unsigned int sum;
/* Double reduction is currently not supported, outer loop is not
parallelized. Inner reduction is detected, inner loop is
parallelized. */
sum = 0;
for (i = 0; i < N; i++)
for (j = 0; j < N; j++)
sum += x[i][j];
g_sum = sum;
}
int
main (void)
{
parloop (500);
return 0;
}
/* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops" { xfail *-*-* } } } */
/* { dg-final { scan-tree-dump-times "loopfn" 4 "optimized" { xfail *-*-* } } } */

View file

@ -0,0 +1,49 @@
/* { dg-do compile } */
/* { dg-options "-O2 -ftree-parallelize-loops=4 -fdump-tree-parloops-details -fdump-tree-optimized" } */
void abort (void);
unsigned int x[500][500];
unsigned int y[500];
unsigned int g_sum=0;
void __attribute__((noinline))
init (int i, int j)
{
x[i][j]=1;
}
void __attribute__((noinline))
parloop (int N)
{
int i, j;
unsigned int sum;
/* Inner cycle is currently not supported, outer loop is not
parallelized. Inner reduction is detected, inner loop is
parallelized. */
for (i = 0; i < N; i++)
{
sum = 0;
for (j = 0; j < N; j++)
sum += x[i][j];
y[i]=sum;
}
g_sum = sum;
}
int
main (void)
{
int i, j;
for (i = 0; i < 500; i++)
for (j = 0; j < 500; j++)
init (i, j);
parloop (500);
return 0;
}
/* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops" { xfail *-*-* } } } */
/* { dg-final { scan-tree-dump-times "loopfn" 4 "optimized" } } */

View file

@ -0,0 +1,51 @@
/* { dg-do compile } */
/* { dg-options "-O2 -ftree-parallelize-loops=4 -fdump-tree-parloops-details -fdump-tree-optimized" } */
void abort (void);
unsigned int x[500][500];
unsigned int y[500];
unsigned int g_sum=0;
void __attribute__((noinline))
init (int i, int j)
{
x[i][j]=1;
}
void __attribute__((noinline))
parloop (int N)
{
int i, j;
unsigned int sum;
/* Outer loop reduction, outerloop is parallelized. */
sum=0;
for (i = 0; i < N; i++)
{
for (j = 0; j < N; j++)
y[i]=x[i][j];
sum += y[i];
}
g_sum = sum;
}
int
main (void)
{
int i, j;
for (i = 0; i < 500; i++)
for (j = 0; j < 500; j++)
init (i, j);
parloop (500);
return 0;
}
/* Check that outer loop is parallelized. */
/* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops" } } */
/* { dg-final { scan-tree-dump-times "parallelizing inner loop" 0 "parloops" } } */
/* { dg-final { scan-tree-dump-times "loopfn" 4 "optimized" } } */

View file

@ -2376,9 +2376,9 @@ gather_scalar_reductions (loop_p loop, reduction_info_table_type *reduction_list
if (!simple_iv (loop, loop, res, &iv, true)
&& simple_loop_info)
{
gimple reduc_stmt = vect_force_simple_reduction (simple_loop_info,
phi, true,
&double_reduc);
gimple reduc_stmt
= vect_force_simple_reduction (simple_loop_info, phi, true,
&double_reduc, true);
if (reduc_stmt && !double_reduc)
build_new_reduction (reduction_list, reduc_stmt, phi);
}

View file

@ -715,7 +715,7 @@ vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
nested_cycle = (loop != LOOP_VINFO_LOOP (loop_vinfo));
reduc_stmt = vect_force_simple_reduction (loop_vinfo, phi, !nested_cycle,
&double_reduc);
&double_reduc, false);
if (reduc_stmt)
{
if (double_reduc)
@ -2339,7 +2339,7 @@ vect_is_slp_reduction (loop_vec_info loop_info, gimple phi, gimple first_stmt)
static gimple
vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple phi,
bool check_reduction, bool *double_reduc,
bool modify)
bool modify, bool need_wrapping_integral_overflow)
{
struct loop *loop = (gimple_bb (phi))->loop_father;
struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
@ -2613,14 +2613,26 @@ vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple phi,
"reduction: unsafe fp math optimization: ");
return NULL;
}
else if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type)
&& check_reduction)
else if (INTEGRAL_TYPE_P (type) && check_reduction)
{
/* Changing the order of operations changes the semantics. */
if (dump_enabled_p ())
report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
"reduction: unsafe int math optimization: ");
return NULL;
if (TYPE_OVERFLOW_TRAPS (type))
{
/* Changing the order of operations changes the semantics. */
if (dump_enabled_p ())
report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
"reduction: unsafe int math optimization"
" (overflow traps): ");
return NULL;
}
if (need_wrapping_integral_overflow && !TYPE_OVERFLOW_WRAPS (type))
{
/* Changing the order of operations changes the semantics. */
if (dump_enabled_p ())
report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
"reduction: unsafe int math optimization"
" (overflow doesn't wrap): ");
return NULL;
}
}
else if (SAT_FIXED_POINT_TYPE_P (type) && check_reduction)
{
@ -2749,10 +2761,12 @@ vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple phi,
static gimple
vect_is_simple_reduction (loop_vec_info loop_info, gimple phi,
bool check_reduction, bool *double_reduc)
bool check_reduction, bool *double_reduc,
bool need_wrapping_integral_overflow)
{
return vect_is_simple_reduction_1 (loop_info, phi, check_reduction,
double_reduc, false);
double_reduc, false,
need_wrapping_integral_overflow);
}
/* Wrapper around vect_is_simple_reduction_1, which will modify code
@ -2761,10 +2775,12 @@ vect_is_simple_reduction (loop_vec_info loop_info, gimple phi,
gimple
vect_force_simple_reduction (loop_vec_info loop_info, gimple phi,
bool check_reduction, bool *double_reduc)
bool check_reduction, bool *double_reduc,
bool need_wrapping_integral_overflow)
{
return vect_is_simple_reduction_1 (loop_info, phi, check_reduction,
double_reduc, true);
double_reduc, true,
need_wrapping_integral_overflow);
}
/* Calculate cost of peeling the loop PEEL_ITERS_PROLOGUE times. */
@ -5074,7 +5090,7 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
}
gimple tmp = vect_is_simple_reduction (loop_vinfo, reduc_def_stmt,
!nested_cycle, &dummy);
!nested_cycle, &dummy, false);
if (orig_stmt)
gcc_assert (tmp == orig_stmt
|| GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) == orig_stmt);

View file

@ -1090,7 +1090,8 @@ extern tree vect_create_addr_base_for_vector_ref (gimple, gimple_seq *,
/* In tree-vect-loop.c. */
/* FORNOW: Used in tree-parloops.c. */
extern void destroy_loop_vec_info (loop_vec_info, bool);
extern gimple vect_force_simple_reduction (loop_vec_info, gimple, bool, bool *);
extern gimple vect_force_simple_reduction (loop_vec_info, gimple, bool, bool *,
bool);
/* Drive for loop analysis stage. */
extern loop_vec_info vect_analyze_loop (struct loop *);
/* Drive for loop transformation stage. */