re PR tree-optimization/41783 (r151561 (PRE fix) regresses zeusmp)
PR tree-optimization/41783 * tree-data-ref.c (toplevel): Include flags.h. (dump_data_dependence_relation): Also dump the inputs if the result will be unknown. (split_constant_offset_1): Look through some conversions. * tree-predcom.c (determine_roots_comp): Restart a new chain if the offset from last element is too large. (ref_at_iteration): Deal also with MISALIGNED_INDIRECT_REF. (reassociate_to_the_same_stmt): Handle vector registers. * tree-vect-data-refs.c (vect_equal_offsets): Handle unary operations (e.g. conversions). * tree-vect-loop-manip.c (vect_gen_niters_for_prolog_loop): Add wide_prolog_niters argument, emit widening instructions. (vect_do_peeling_for_alignment): Adjust caller, use widened variant of the iteration cound. * Makefile.in (tree-data-ref.o): Add $(FLAGS_H). testsuite/ * gfortran.dg/vect/fast-math-mgrid-resid.f: New. From-SVN: r156043
This commit is contained in:
parent
b3d7e1910f
commit
b61b1f1708
8 changed files with 154 additions and 19 deletions
|
@ -1,10 +1,29 @@
|
|||
2010-01-19 Michael Matz <matz@suse.de>
|
||||
|
||||
PR tree-optimization/41783
|
||||
* tree-data-ref.c (toplevel): Include flags.h.
|
||||
(dump_data_dependence_relation): Also dump the inputs if the
|
||||
result will be unknown.
|
||||
(split_constant_offset_1): Look through some conversions.
|
||||
* tree-predcom.c (determine_roots_comp): Restart a new chain if
|
||||
the offset from last element is too large.
|
||||
(ref_at_iteration): Deal also with MISALIGNED_INDIRECT_REF.
|
||||
(reassociate_to_the_same_stmt): Handle vector registers.
|
||||
* tree-vect-data-refs.c (vect_equal_offsets): Handle unary operations
|
||||
(e.g. conversions).
|
||||
* tree-vect-loop-manip.c (vect_gen_niters_for_prolog_loop): Add
|
||||
wide_prolog_niters argument, emit widening instructions.
|
||||
(vect_do_peeling_for_alignment): Adjust caller, use widened
|
||||
variant of the iteration cound.
|
||||
* Makefile.in (tree-data-ref.o): Add $(FLAGS_H).
|
||||
|
||||
2010-01-19 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
|
||||
|
||||
PR target/38697
|
||||
* config/arm/neon-testgen.m (emit_automatics): New parameter
|
||||
PR target/38697
|
||||
* config/arm/neon-testgen.m (emit_automatics): New parameter
|
||||
features. Adjust for Fixed_return_reg feature.
|
||||
(test_intrinsic): Call emit_automatics with new feature.
|
||||
* config/arm/neon.ml: Update copyright years.
|
||||
* config/arm/neon.ml: Update copyright years.
|
||||
(features): New Fixed_return_reg feature.
|
||||
(ops): Update feature for Vget_low.
|
||||
|
||||
|
|
|
@ -2548,7 +2548,7 @@ tree-scalar-evolution.o: tree-scalar-evolution.c $(CONFIG_H) $(SYSTEM_H) \
|
|||
$(TIMEVAR_H) $(CFGLOOP_H) $(SCEV_H) $(TREE_PASS_H) $(FLAGS_H) \
|
||||
gt-tree-scalar-evolution.h
|
||||
tree-data-ref.o: tree-data-ref.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
|
||||
$(GGC_H) $(TREE_H) $(RTL_H) $(BASIC_BLOCK_H) $(DIAGNOSTIC_H) \
|
||||
$(GGC_H) $(FLAGS_H) $(TREE_H) $(RTL_H) $(BASIC_BLOCK_H) $(DIAGNOSTIC_H) \
|
||||
$(TREE_FLOW_H) $(TREE_DUMP_H) $(TIMEVAR_H) $(CFGLOOP_H) \
|
||||
$(TREE_DATA_REF_H) $(TREE_PASS_H) langhooks.h
|
||||
sese.o: sese.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
|
||||
|
|
|
@ -1,3 +1,8 @@
|
|||
2010-01-19 Michael Matz <matz@suse.de>
|
||||
|
||||
PR tree-optimization/41783
|
||||
* gfortran.dg/vect/fast-math-mgrid-resid.f: New.
|
||||
|
||||
2010-01-19 Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
|
||||
|
||||
PR target/38697.
|
||||
|
|
44
gcc/testsuite/gfortran.dg/vect/fast-math-mgrid-resid.f
Normal file
44
gcc/testsuite/gfortran.dg/vect/fast-math-mgrid-resid.f
Normal file
|
@ -0,0 +1,44 @@
|
|||
! { dg-do compile }
|
||||
! { dg-require-effective-target vect_double }
|
||||
! { dg-options "-O3 -ffast-math -fpredictive-commoning -ftree-vectorize -fdump-tree-optimized" }
|
||||
|
||||
******* RESID COMPUTES THE RESIDUAL: R = V - AU
|
||||
*
|
||||
* THIS SIMPLE IMPLEMENTATION COSTS 27A + 4M PER RESULT, WHERE
|
||||
* A AND M DENOTE THE COSTS OF ADDITION (OR SUBTRACTION) AND
|
||||
* MULTIPLICATION, RESPECTIVELY. BY USING SEVERAL TWO-DIMENSIONAL
|
||||
* BUFFERS ONE CAN REDUCE THIS COST TO 13A + 4M IN THE GENERAL
|
||||
* CASE, OR 10A + 3M WHEN THE COEFFICIENT A(1) IS ZERO.
|
||||
*
|
||||
SUBROUTINE RESID(U,V,R,N,A)
|
||||
INTEGER N
|
||||
REAL*8 U(N,N,N),V(N,N,N),R(N,N,N),A(0:3)
|
||||
INTEGER I3, I2, I1
|
||||
C
|
||||
DO 600 I3=2,N-1
|
||||
DO 600 I2=2,N-1
|
||||
DO 600 I1=2,N-1
|
||||
600 R(I1,I2,I3)=V(I1,I2,I3)
|
||||
> -A(0)*( U(I1, I2, I3 ) )
|
||||
> -A(1)*( U(I1-1,I2, I3 ) + U(I1+1,I2, I3 )
|
||||
> + U(I1, I2-1,I3 ) + U(I1, I2+1,I3 )
|
||||
> + U(I1, I2, I3-1) + U(I1, I2, I3+1) )
|
||||
> -A(2)*( U(I1-1,I2-1,I3 ) + U(I1+1,I2-1,I3 )
|
||||
> + U(I1-1,I2+1,I3 ) + U(I1+1,I2+1,I3 )
|
||||
> + U(I1, I2-1,I3-1) + U(I1, I2+1,I3-1)
|
||||
> + U(I1, I2-1,I3+1) + U(I1, I2+1,I3+1)
|
||||
> + U(I1-1,I2, I3-1) + U(I1-1,I2, I3+1)
|
||||
> + U(I1+1,I2, I3-1) + U(I1+1,I2, I3+1) )
|
||||
> -A(3)*( U(I1-1,I2-1,I3-1) + U(I1+1,I2-1,I3-1)
|
||||
> + U(I1-1,I2+1,I3-1) + U(I1+1,I2+1,I3-1)
|
||||
> + U(I1-1,I2-1,I3+1) + U(I1+1,I2-1,I3+1)
|
||||
> + U(I1-1,I2+1,I3+1) + U(I1+1,I2+1,I3+1) )
|
||||
C
|
||||
RETURN
|
||||
END
|
||||
! we want to check that predictive commoning did something on the
|
||||
! vectorized loop, which means we have to have exactly 13 vector
|
||||
! additions.
|
||||
! { dg-final { scan-tree-dump-times "vect_var\[^\\n\]*\\+ " 13 "optimized" } }
|
||||
! { dg-final { cleanup-tree-dump "vect" } }
|
||||
! { dg-final { cleanup-tree-dump "optimized" } }
|
|
@ -79,6 +79,7 @@ along with GCC; see the file COPYING3. If not see
|
|||
#include "coretypes.h"
|
||||
#include "tm.h"
|
||||
#include "ggc.h"
|
||||
#include "flags.h"
|
||||
#include "tree.h"
|
||||
|
||||
/* These RTL headers are needed for basic-block.h. */
|
||||
|
@ -380,6 +381,19 @@ dump_data_dependence_relation (FILE *outf,
|
|||
|
||||
if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
|
||||
{
|
||||
if (ddr)
|
||||
{
|
||||
dra = DDR_A (ddr);
|
||||
drb = DDR_B (ddr);
|
||||
if (dra)
|
||||
dump_data_reference (outf, dra);
|
||||
else
|
||||
fprintf (outf, " (nil)\n");
|
||||
if (drb)
|
||||
dump_data_reference (outf, drb);
|
||||
else
|
||||
fprintf (outf, " (nil)\n");
|
||||
}
|
||||
fprintf (outf, " (don't know)\n)\n");
|
||||
return;
|
||||
}
|
||||
|
@ -631,6 +645,24 @@ split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
|
|||
|
||||
return split_constant_offset_1 (type, var0, subcode, var1, var, off);
|
||||
}
|
||||
CASE_CONVERT:
|
||||
{
|
||||
/* We must not introduce undefined overflow, and we must not change the value.
|
||||
Hence we're okay if the inner type doesn't overflow to start with
|
||||
(pointer or signed), the outer type also is an integer or pointer
|
||||
and the outer precision is at least as large as the inner. */
|
||||
tree itype = TREE_TYPE (op0);
|
||||
if ((POINTER_TYPE_P (itype)
|
||||
|| (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype)))
|
||||
&& TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
|
||||
&& (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
|
||||
{
|
||||
split_constant_offset (op0, &var0, off);
|
||||
*var = fold_convert (type, var0);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
default:
|
||||
return false;
|
||||
|
|
|
@ -1180,6 +1180,7 @@ determine_roots_comp (struct loop *loop,
|
|||
unsigned i;
|
||||
dref a;
|
||||
chain_p chain = NULL;
|
||||
double_int last_ofs = double_int_zero;
|
||||
|
||||
/* Invariants are handled specially. */
|
||||
if (comp->comp_step == RS_INVARIANT)
|
||||
|
@ -1194,13 +1195,20 @@ determine_roots_comp (struct loop *loop,
|
|||
|
||||
for (i = 0; VEC_iterate (dref, comp->refs, i, a); i++)
|
||||
{
|
||||
if (!chain || !DR_IS_READ (a->ref))
|
||||
if (!chain || !DR_IS_READ (a->ref)
|
||||
|| double_int_ucmp (uhwi_to_double_int (MAX_DISTANCE),
|
||||
double_int_add (a->offset,
|
||||
double_int_neg (last_ofs))) <= 0)
|
||||
{
|
||||
if (nontrivial_chain_p (chain))
|
||||
VEC_safe_push (chain_p, heap, *chains, chain);
|
||||
{
|
||||
add_looparound_copies (loop, chain);
|
||||
VEC_safe_push (chain_p, heap, *chains, chain);
|
||||
}
|
||||
else
|
||||
release_chain (chain);
|
||||
chain = make_rooted_chain (a);
|
||||
last_ofs = a->offset;
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -1338,9 +1346,11 @@ ref_at_iteration (struct loop *loop, tree ref, int iter)
|
|||
else if (!INDIRECT_REF_P (ref))
|
||||
return unshare_expr (ref);
|
||||
|
||||
if (TREE_CODE (ref) == INDIRECT_REF)
|
||||
if (INDIRECT_REF_P (ref))
|
||||
{
|
||||
ret = build1 (INDIRECT_REF, TREE_TYPE (ref), NULL_TREE);
|
||||
/* Take care for INDIRECT_REF and MISALIGNED_INDIRECT_REF at
|
||||
the same time. */
|
||||
ret = copy_node (ref);
|
||||
idx = TREE_OPERAND (ref, 0);
|
||||
idx_p = &TREE_OPERAND (ret, 0);
|
||||
}
|
||||
|
@ -2205,11 +2215,17 @@ reassociate_to_the_same_stmt (tree name1, tree name2)
|
|||
/* Insert the new statement combining NAME1 and NAME2 before S1, and
|
||||
combine it with the rhs of S1. */
|
||||
var = create_tmp_var (type, "predreastmp");
|
||||
if (TREE_CODE (type) == COMPLEX_TYPE
|
||||
|| TREE_CODE (type) == VECTOR_TYPE)
|
||||
DECL_GIMPLE_REG_P (var) = 1;
|
||||
add_referenced_var (var);
|
||||
new_name = make_ssa_name (var, NULL);
|
||||
new_stmt = gimple_build_assign_with_ops (code, new_name, name1, name2);
|
||||
|
||||
var = create_tmp_var (type, "predreastmp");
|
||||
if (TREE_CODE (type) == COMPLEX_TYPE
|
||||
|| TREE_CODE (type) == VECTOR_TYPE)
|
||||
DECL_GIMPLE_REG_P (var) = 1;
|
||||
add_referenced_var (var);
|
||||
tmp_name = make_ssa_name (var, NULL);
|
||||
|
||||
|
|
|
@ -294,7 +294,7 @@ vect_update_interleaving_chain (struct data_reference *drb,
|
|||
static bool
|
||||
vect_equal_offsets (tree offset1, tree offset2)
|
||||
{
|
||||
bool res0, res1;
|
||||
bool res;
|
||||
|
||||
STRIP_NOPS (offset1);
|
||||
STRIP_NOPS (offset2);
|
||||
|
@ -303,16 +303,19 @@ vect_equal_offsets (tree offset1, tree offset2)
|
|||
return true;
|
||||
|
||||
if (TREE_CODE (offset1) != TREE_CODE (offset2)
|
||||
|| !BINARY_CLASS_P (offset1)
|
||||
|| !BINARY_CLASS_P (offset2))
|
||||
|| (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
|
||||
return false;
|
||||
|
||||
res0 = vect_equal_offsets (TREE_OPERAND (offset1, 0),
|
||||
TREE_OPERAND (offset2, 0));
|
||||
res1 = vect_equal_offsets (TREE_OPERAND (offset1, 1),
|
||||
TREE_OPERAND (offset2, 1));
|
||||
res = vect_equal_offsets (TREE_OPERAND (offset1, 0),
|
||||
TREE_OPERAND (offset2, 0));
|
||||
|
||||
return (res0 && res1);
|
||||
if (!res || !BINARY_CLASS_P (offset1))
|
||||
return res;
|
||||
|
||||
res = vect_equal_offsets (TREE_OPERAND (offset1, 1),
|
||||
TREE_OPERAND (offset2, 1));
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -1961,7 +1961,8 @@ vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
|
|||
use TYPE_VECTOR_SUBPARTS. */
|
||||
|
||||
static tree
|
||||
vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
|
||||
vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters,
|
||||
tree *wide_prolog_niters)
|
||||
{
|
||||
struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
|
||||
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
|
||||
|
@ -2045,6 +2046,19 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
|
|||
add_referenced_var (var);
|
||||
stmts = NULL;
|
||||
iters_name = force_gimple_operand (iters, &stmts, false, var);
|
||||
if (types_compatible_p (sizetype, niters_type))
|
||||
*wide_prolog_niters = iters_name;
|
||||
else
|
||||
{
|
||||
gimple_seq seq = NULL;
|
||||
tree wide_iters = fold_convert (sizetype, iters);
|
||||
var = create_tmp_var (sizetype, "prolog_loop_niters");
|
||||
add_referenced_var (var);
|
||||
*wide_prolog_niters = force_gimple_operand (wide_iters, &seq, false,
|
||||
var);
|
||||
if (seq)
|
||||
gimple_seq_add_seq (&stmts, seq);
|
||||
}
|
||||
|
||||
/* Insert stmt on loop preheader edge. */
|
||||
if (stmts)
|
||||
|
@ -2115,6 +2129,7 @@ vect_do_peeling_for_alignment (loop_vec_info loop_vinfo)
|
|||
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
|
||||
tree niters_of_prolog_loop, ni_name;
|
||||
tree n_iters;
|
||||
tree wide_prolog_niters;
|
||||
struct loop *new_loop;
|
||||
unsigned int th = 0;
|
||||
int min_profitable_iters;
|
||||
|
@ -2125,7 +2140,8 @@ vect_do_peeling_for_alignment (loop_vec_info loop_vinfo)
|
|||
initialize_original_copy_tables ();
|
||||
|
||||
ni_name = vect_build_loop_niters (loop_vinfo, NULL);
|
||||
niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
|
||||
niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name,
|
||||
&wide_prolog_niters);
|
||||
|
||||
|
||||
/* Get profitability threshold for vectorized loop. */
|
||||
|
@ -2150,7 +2166,7 @@ vect_do_peeling_for_alignment (loop_vec_info loop_vinfo)
|
|||
TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
|
||||
|
||||
/* Update the init conditions of the access functions of all data refs. */
|
||||
vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
|
||||
vect_update_inits_of_drs (loop_vinfo, wide_prolog_niters);
|
||||
|
||||
/* After peeling we have to reset scalar evolution analyzer. */
|
||||
scev_reset ();
|
||||
|
|
Loading…
Add table
Reference in a new issue