target.h (builtin_vectorization_cost): Add new target builtin.
2007-07-12 Dorit Nuzman <dorit@il.ibm.com> * target.h (builtin_vectorization_cost): Add new target builtin. * target-def.h (TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST): New. * tree-vectorizer.h (TARG_SCALAR_STMT_COST): New. (TARG_SCALAR_LOAD_COST, TARG_SCALAR_STORE_COST): New. * tree-vect-analyze.c (vect_analyze_slp_instance): Initisliaze uninitialized variables. * tree-vect-transform.c (cost_for_stmt): New function. (vect_estimate_min_profitable_iters): Call cost_for_stmt instead of using cost 1 for all scalar stmts. Be less conservative when estimating the number of prologue/epulogue iterations. Call targetm.vectorize.builtin_vectorization_cost. Return min_profitable_iters-1. (vect_model_reduction_cost): Use TARG_SCALAR_TO_VEC_COST for initialization cost instead of TARG_VEC_STMT_COST. Use TARG_VEC_TO_SCALAR_COST instead of TARG_VEC_STMT_COST for reduction epilogue code. Fix epilogue cost computation. * config/spu/spu.c (spu_builtin_vectorization_cost): New. (TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST): Implement. * config/spu/spu.h (TARG_COND_BRANCH_COST, TARG_SCALAR_STMT_COST): (TARG_SCALAR_LOAD_COST, TARG_SCALAR_STORE_COST, TARG_VEC_STMT_COST): (TARG_VEC_TO_SCALAR_COST, TARG_SCALAR_TO_VEC, TARG_VEC_LOAD_COST): (TARG_VEC_UNALIGNED_LOAD_COST, TARG_VEC_STORE_COST): Define. 2007-07-12 Dorit Nuzman <dorit@il.ibm.com> * gcc.dg/vect/costmodel/ppc/costmodel-vect-reduc-1char.c: Loops now get vectorized. * gcc.dg/vect/costmodel/i386/costmodel-vect-reduc-1char.c: Loops now get vectorized. * gcc.dg/vect/costmodel/spu/spu-costmodel-vect.exp: New. * gcc.dg/vect/costmodel/spu/costmodel-fast-math-vect-pr29925.c: New. * gcc.dg/vect/costmodel/spu/costmodel-vect-31a.c: New. * gcc.dg/vect/costmodel/spu/costmodel-vect-31b.c: New. * gcc.dg/vect/costmodel/spu/costmodel-vect-31c.c: New. * gcc.dg/vect/costmodel/spu/costmodel-vect-31d.c: New. * gcc.dg/vect/costmodel/spu/costmodel-vect-iv-9.c: New. * gcc.dg/vect/costmodel/spu/costmodel-vect-33.c: New. * gcc.dg/vect/costmodel/spu/costmodel-vect-76a.c: New. * gcc.dg/vect/costmodel/spu/costmodel-vect-76b.c: New. * gcc.dg/vect/costmodel/spu/costmodel-vect-76c.c: New. * gcc.dg/vect/costmodel/spu/costmodel-vect-68a.c: New. * gcc.dg/vect/costmodel/spu/costmodel-vect-68b.c: New. * gcc.dg/vect/costmodel/spu/costmodel-vect-68c.c: New. * gcc.dg/vect/costmodel/spu/costmodel-vect-68d.c: New. * lib/target-supports.exp (check_effective_target_vect_int_mul): Add spu. From-SVN: r126584
This commit is contained in:
parent
e1c8221962
commit
e95b59d2ab
26 changed files with 937 additions and 18 deletions
|
@ -1,3 +1,28 @@
|
|||
2007-07-12 Dorit Nuzman <dorit@il.ibm.com>
|
||||
|
||||
* target.h (builtin_vectorization_cost): Add new target builtin.
|
||||
* target-def.h (TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST): New.
|
||||
* tree-vectorizer.h (TARG_SCALAR_STMT_COST): New.
|
||||
(TARG_SCALAR_LOAD_COST, TARG_SCALAR_STORE_COST): New.
|
||||
* tree-vect-analyze.c (vect_analyze_slp_instance): Initisliaze
|
||||
uninitialized variables.
|
||||
* tree-vect-transform.c (cost_for_stmt): New function.
|
||||
(vect_estimate_min_profitable_iters): Call cost_for_stmt instead of
|
||||
using cost 1 for all scalar stmts. Be less conservative when
|
||||
estimating the number of prologue/epulogue iterations. Call
|
||||
targetm.vectorize.builtin_vectorization_cost. Return
|
||||
min_profitable_iters-1.
|
||||
(vect_model_reduction_cost): Use TARG_SCALAR_TO_VEC_COST for
|
||||
initialization cost instead of TARG_VEC_STMT_COST. Use
|
||||
TARG_VEC_TO_SCALAR_COST instead of TARG_VEC_STMT_COST for reduction
|
||||
epilogue code. Fix epilogue cost computation.
|
||||
* config/spu/spu.c (spu_builtin_vectorization_cost): New.
|
||||
(TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST): Implement.
|
||||
* config/spu/spu.h (TARG_COND_BRANCH_COST, TARG_SCALAR_STMT_COST):
|
||||
(TARG_SCALAR_LOAD_COST, TARG_SCALAR_STORE_COST, TARG_VEC_STMT_COST):
|
||||
(TARG_VEC_TO_SCALAR_COST, TARG_SCALAR_TO_VEC, TARG_VEC_LOAD_COST):
|
||||
(TARG_VEC_UNALIGNED_LOAD_COST, TARG_VEC_STORE_COST): Define.
|
||||
|
||||
2007-07-12 Richard Guenther <rguenther@suse.de>
|
||||
|
||||
* gimplify.c (gimplify_conversion): Make sure that the result
|
||||
|
|
|
@ -133,6 +133,7 @@ static void spu_encode_section_info (tree, rtx, int);
|
|||
static tree spu_builtin_mul_widen_even (tree);
|
||||
static tree spu_builtin_mul_widen_odd (tree);
|
||||
static tree spu_builtin_mask_for_load (void);
|
||||
static int spu_builtin_vectorization_cost (bool);
|
||||
|
||||
extern const char *reg_names[];
|
||||
rtx spu_compare_op0, spu_compare_op1;
|
||||
|
@ -261,6 +262,9 @@ const struct attribute_spec spu_attribute_table[];
|
|||
#undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
|
||||
#define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
|
||||
|
||||
#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
|
||||
#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
|
||||
|
||||
struct gcc_target targetm = TARGET_INITIALIZER;
|
||||
|
||||
void
|
||||
|
@ -5191,6 +5195,21 @@ spu_builtin_mask_for_load (void)
|
|||
return d->fndecl;
|
||||
}
|
||||
|
||||
/* Implement targetm.vectorize.builtin_vectorization_cost. */
|
||||
static int
|
||||
spu_builtin_vectorization_cost (bool runtime_test)
|
||||
{
|
||||
/* If the branch of the runtime test is taken - i.e. - the vectorized
|
||||
version is skipped - this incurs a misprediction cost (because the
|
||||
vectorized version is expected to be the fall-through). So we subtract
|
||||
the latency of a mispredicted branch from the costs that are incured
|
||||
when the vectorized version is executed. */
|
||||
if (runtime_test)
|
||||
return -19;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
spu_init_expanders (void)
|
||||
{
|
||||
|
|
|
@ -541,6 +541,52 @@ targetm.resolve_overloaded_builtin = spu_resolve_overloaded_builtin; \
|
|||
#define ASM_OUTPUT_ALIGN(FILE,LOG) \
|
||||
do { if (LOG!=0) fprintf (FILE, "\t.align\t%d\n", (LOG)); } while (0)
|
||||
|
||||
|
||||
/* Model costs for the vectorizer. */
|
||||
|
||||
/* Cost of conditional branch. */
|
||||
#ifndef TARG_COND_BRANCH_COST
|
||||
#define TARG_COND_BRANCH_COST 6
|
||||
#endif
|
||||
|
||||
/* Cost of any scalar operation, excluding load and store. */
|
||||
#ifndef TARG_SCALAR_STMT_COST
|
||||
#define TARG_SCALAR_STMT_COST 1
|
||||
#endif
|
||||
|
||||
/* Cost of scalar load. */
|
||||
#undef TARG_SCALAR_LOAD_COST
|
||||
#define TARG_SCALAR_LOAD_COST 2 /* load + rotate */
|
||||
|
||||
/* Cost of scalar store. */
|
||||
#undef TARG_SCALAR_STORE_COST
|
||||
#define TARG_SCALAR_STORE_COST 10
|
||||
|
||||
/* Cost of any vector operation, excluding load, store,
|
||||
or vector to scalar operation. */
|
||||
#undef TARG_VEC_STMT_COST
|
||||
#define TARG_VEC_STMT_COST 1
|
||||
|
||||
/* Cost of vector to scalar operation. */
|
||||
#undef TARG_VEC_TO_SCALAR_COST
|
||||
#define TARG_VEC_TO_SCALAR_COST 1
|
||||
|
||||
/* Cost of scalar to vector operation. */
|
||||
#undef TARG_SCALAR_TO_VEC_COST
|
||||
#define TARG_SCALAR_TO_VEC_COST 1
|
||||
|
||||
/* Cost of aligned vector load. */
|
||||
#undef TARG_VEC_LOAD_COST
|
||||
#define TARG_VEC_LOAD_COST 1
|
||||
|
||||
/* Cost of misaligned vector load. */
|
||||
#undef TARG_VEC_UNALIGNED_LOAD_COST
|
||||
#define TARG_VEC_UNALIGNED_LOAD_COST 2
|
||||
|
||||
/* Cost of vector store. */
|
||||
#undef TARG_VEC_STORE_COST
|
||||
#define TARG_VEC_STORE_COST 1
|
||||
|
||||
|
||||
/* Misc */
|
||||
|
||||
|
|
|
@ -356,6 +356,7 @@ Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|||
default_builtin_vectorized_conversion
|
||||
#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN 0
|
||||
#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD 0
|
||||
#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST 0
|
||||
|
||||
#define TARGET_VECTORIZE \
|
||||
{ \
|
||||
|
@ -363,7 +364,8 @@ Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|||
TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION, \
|
||||
TARGET_VECTORIZE_BUILTIN_CONVERSION, \
|
||||
TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN, \
|
||||
TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD \
|
||||
TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD, \
|
||||
TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
|
||||
}
|
||||
|
||||
#define TARGET_DEFAULT_TARGET_FLAGS 0
|
||||
|
|
|
@ -413,6 +413,10 @@ struct gcc_target
|
|||
element-by-element products for the odd elements. */
|
||||
tree (* builtin_mul_widen_even) (tree);
|
||||
tree (* builtin_mul_widen_odd) (tree);
|
||||
|
||||
/* Returns the cost to be added to the overheads involved with
|
||||
executing the vectorized version of a loop. */
|
||||
int (*builtin_vectorization_cost) (bool);
|
||||
} vectorize;
|
||||
|
||||
/* The initial value of target_flags. */
|
||||
|
|
|
@ -1,3 +1,27 @@
|
|||
2007-07-12 Dorit Nuzman <dorit@il.ibm.com>
|
||||
|
||||
* gcc.dg/vect/costmodel/ppc/costmodel-vect-reduc-1char.c: Loops now
|
||||
get vectorized.
|
||||
* gcc.dg/vect/costmodel/i386/costmodel-vect-reduc-1char.c: Loops
|
||||
now get vectorized.
|
||||
* gcc.dg/vect/costmodel/spu/spu-costmodel-vect.exp: New.
|
||||
* gcc.dg/vect/costmodel/spu/costmodel-fast-math-vect-pr29925.c: New.
|
||||
* gcc.dg/vect/costmodel/spu/costmodel-vect-31a.c: New.
|
||||
* gcc.dg/vect/costmodel/spu/costmodel-vect-31b.c: New.
|
||||
* gcc.dg/vect/costmodel/spu/costmodel-vect-31c.c: New.
|
||||
* gcc.dg/vect/costmodel/spu/costmodel-vect-31d.c: New.
|
||||
* gcc.dg/vect/costmodel/spu/costmodel-vect-iv-9.c: New.
|
||||
* gcc.dg/vect/costmodel/spu/costmodel-vect-33.c: New.
|
||||
* gcc.dg/vect/costmodel/spu/costmodel-vect-76a.c: New.
|
||||
* gcc.dg/vect/costmodel/spu/costmodel-vect-76b.c: New.
|
||||
* gcc.dg/vect/costmodel/spu/costmodel-vect-76c.c: New.
|
||||
* gcc.dg/vect/costmodel/spu/costmodel-vect-68a.c: New.
|
||||
* gcc.dg/vect/costmodel/spu/costmodel-vect-68b.c: New.
|
||||
* gcc.dg/vect/costmodel/spu/costmodel-vect-68c.c: New.
|
||||
* gcc.dg/vect/costmodel/spu/costmodel-vect-68d.c: New.
|
||||
* lib/target-supports.exp (check_effective_target_vect_int_mul):
|
||||
Add spu.
|
||||
|
||||
2007-07-12 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
PR c++/30854
|
||||
|
|
|
@ -46,6 +46,6 @@ int main (void)
|
|||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail vect_no_int_max } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorization not profitable" 2 "vect" { xfail vect_no_int_max } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { xfail vect_no_int_max } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorization not profitable" 0 "vect" } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
|
|
@ -46,6 +46,6 @@ int main (void)
|
|||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail vect_no_int_max } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorization not profitable" 2 "vect" { xfail vect_no_int_max } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { xfail vect_no_int_max } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorization not profitable" 0 "vect" } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
|
|
@ -0,0 +1,39 @@
|
|||
/* { dg-require-effective-target vect_float } */
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "../../tree-vect.h"
|
||||
|
||||
void interp_pitch(float *exc, float *interp, int pitch, int len)
|
||||
{
|
||||
int i,k;
|
||||
int maxj;
|
||||
|
||||
maxj=3;
|
||||
for (i=0;i<len;i++)
|
||||
{
|
||||
float tmp = 0;
|
||||
for (k=0;k<7;k++)
|
||||
{
|
||||
tmp += exc[i-pitch+k+maxj-6];
|
||||
}
|
||||
interp[i] = tmp;
|
||||
}
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
float *exc = calloc(126,sizeof(float));
|
||||
float *interp = calloc(80,sizeof(float));
|
||||
int pitch = -35;
|
||||
|
||||
check_vect ();
|
||||
|
||||
interp_pitch(exc, interp, pitch, 80);
|
||||
free(exc);
|
||||
free(interp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorization not profitable" 1 "vect" } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
51
gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-31a.c
Normal file
51
gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-31a.c
Normal file
|
@ -0,0 +1,51 @@
|
|||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "../../tree-vect.h"
|
||||
|
||||
#define N 32
|
||||
|
||||
struct t{
|
||||
int k[N];
|
||||
int l;
|
||||
};
|
||||
|
||||
struct s{
|
||||
char a; /* aligned */
|
||||
char b[N-1]; /* unaligned (offset 1B) */
|
||||
char c[N]; /* aligned (offset NB) */
|
||||
struct t d; /* aligned (offset 2NB) */
|
||||
struct t e; /* unaligned (offset 2N+4N+4 B) */
|
||||
};
|
||||
|
||||
int main1 ()
|
||||
{
|
||||
int i;
|
||||
struct s tmp;
|
||||
|
||||
/* unaligned */
|
||||
for (i = 0; i < N/2; i++)
|
||||
{
|
||||
tmp.b[i] = 5;
|
||||
}
|
||||
|
||||
/* check results: */
|
||||
for (i = 0; i <N/2; i++)
|
||||
{
|
||||
if (tmp.b[i] != 5)
|
||||
abort ();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
|
||||
return main1 ();
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorization not profitable" 1 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
50
gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-31b.c
Normal file
50
gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-31b.c
Normal file
|
@ -0,0 +1,50 @@
|
|||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "../../tree-vect.h"
|
||||
|
||||
#define N 32
|
||||
|
||||
struct t{
|
||||
int k[N];
|
||||
int l;
|
||||
};
|
||||
|
||||
struct s{
|
||||
char a; /* aligned */
|
||||
char b[N-1]; /* unaligned (offset 1B) */
|
||||
char c[N]; /* aligned (offset NB) */
|
||||
struct t d; /* aligned (offset 2NB) */
|
||||
struct t e; /* unaligned (offset 2N+4N+4 B) */
|
||||
};
|
||||
|
||||
int main1 ()
|
||||
{
|
||||
int i;
|
||||
struct s tmp;
|
||||
|
||||
/* aligned */
|
||||
for (i = 0; i < N/2; i++)
|
||||
{
|
||||
tmp.c[i] = 6;
|
||||
}
|
||||
|
||||
/* check results: */
|
||||
for (i = 0; i <N/2; i++)
|
||||
{
|
||||
if (tmp.c[i] != 6)
|
||||
abort ();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
|
||||
return main1 ();
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
50
gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-31c.c
Normal file
50
gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-31c.c
Normal file
|
@ -0,0 +1,50 @@
|
|||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "../../tree-vect.h"
|
||||
|
||||
#define N 32
|
||||
|
||||
struct t{
|
||||
int k[N];
|
||||
int l;
|
||||
};
|
||||
|
||||
struct s{
|
||||
char a; /* aligned */
|
||||
char b[N-1]; /* unaligned (offset 1B) */
|
||||
char c[N]; /* aligned (offset NB) */
|
||||
struct t d; /* aligned (offset 2NB) */
|
||||
struct t e; /* unaligned (offset 2N+4N+4 B) */
|
||||
};
|
||||
|
||||
int main1 ()
|
||||
{
|
||||
int i;
|
||||
struct s tmp;
|
||||
|
||||
/* aligned */
|
||||
for (i = 0; i < N/2; i++)
|
||||
{
|
||||
tmp.d.k[i] = 7;
|
||||
}
|
||||
|
||||
/* check results: */
|
||||
for (i = 0; i <N/2; i++)
|
||||
{
|
||||
if (tmp.d.k[i] != 7)
|
||||
abort ();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
|
||||
return main1 ();
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
51
gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-31d.c
Normal file
51
gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-31d.c
Normal file
|
@ -0,0 +1,51 @@
|
|||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "../../tree-vect.h"
|
||||
|
||||
#define N 32
|
||||
|
||||
struct t{
|
||||
int k[N];
|
||||
int l;
|
||||
};
|
||||
|
||||
struct s{
|
||||
char a; /* aligned */
|
||||
char b[N-1]; /* unaligned (offset 1B) */
|
||||
char c[N]; /* aligned (offset NB) */
|
||||
struct t d; /* aligned (offset 2NB) */
|
||||
struct t e; /* unaligned (offset 2N+4N+4 B) */
|
||||
};
|
||||
|
||||
int main1 ()
|
||||
{
|
||||
int i;
|
||||
struct s tmp;
|
||||
|
||||
/* unaligned */
|
||||
for (i = 0; i < N/2; i++)
|
||||
{
|
||||
tmp.e.k[i] = 8;
|
||||
}
|
||||
|
||||
/* check results: */
|
||||
for (i = 0; i <N/2; i++)
|
||||
{
|
||||
if (tmp.e.k[i] != 8)
|
||||
abort ();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
|
||||
return main1 ();
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorization not profitable" 0 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
40
gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-33.c
Normal file
40
gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-33.c
Normal file
|
@ -0,0 +1,40 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "../../tree-vect.h"
|
||||
|
||||
#define N 16
|
||||
struct test {
|
||||
char ca[N];
|
||||
};
|
||||
|
||||
extern struct test s;
|
||||
|
||||
int main1 ()
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
s.ca[i] = 5;
|
||||
}
|
||||
|
||||
/* check results: */
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
if (s.ca[i] != 5)
|
||||
abort ();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
return main1 ();
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorization not profitable" 0 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
49
gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-68a.c
Normal file
49
gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-68a.c
Normal file
|
@ -0,0 +1,49 @@
|
|||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "../../tree-vect.h"
|
||||
|
||||
#define N 32
|
||||
|
||||
struct s{
|
||||
int m;
|
||||
int n[N][N][N];
|
||||
};
|
||||
|
||||
struct test1{
|
||||
struct s a; /* array a.n is unaligned */
|
||||
int b;
|
||||
int c;
|
||||
struct s e; /* array e.n is aligned */
|
||||
};
|
||||
|
||||
int main1 ()
|
||||
{
|
||||
int i,j;
|
||||
struct test1 tmp1;
|
||||
|
||||
/* 1. unaligned */
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
tmp1.a.n[1][2][i] = 5;
|
||||
}
|
||||
|
||||
/* check results: */
|
||||
for (i = 0; i <N; i++)
|
||||
{
|
||||
if (tmp1.a.n[1][2][i] != 5)
|
||||
abort ();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
|
||||
return main1 ();
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
49
gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-68b.c
Normal file
49
gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-68b.c
Normal file
|
@ -0,0 +1,49 @@
|
|||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "../../tree-vect.h"
|
||||
|
||||
#define N 32
|
||||
|
||||
struct s{
|
||||
int m;
|
||||
int n[N][N][N];
|
||||
};
|
||||
|
||||
struct test1{
|
||||
struct s a; /* array a.n is unaligned */
|
||||
int b;
|
||||
int c;
|
||||
struct s e; /* array e.n is aligned */
|
||||
};
|
||||
|
||||
int main1 ()
|
||||
{
|
||||
int i,j;
|
||||
struct test1 tmp1;
|
||||
|
||||
/* 2. aligned */
|
||||
for (i = 3; i < N-1; i++)
|
||||
{
|
||||
tmp1.a.n[1][2][i] = 6;
|
||||
}
|
||||
|
||||
/* check results: */
|
||||
for (i = 3; i < N-1; i++)
|
||||
{
|
||||
if (tmp1.a.n[1][2][i] != 6)
|
||||
abort ();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
|
||||
return main1 ();
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
49
gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-68c.c
Normal file
49
gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-68c.c
Normal file
|
@ -0,0 +1,49 @@
|
|||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "../../tree-vect.h"
|
||||
|
||||
#define N 32
|
||||
|
||||
struct s{
|
||||
int m;
|
||||
int n[N][N][N];
|
||||
};
|
||||
|
||||
struct test1{
|
||||
struct s a; /* array a.n is unaligned */
|
||||
int b;
|
||||
int c;
|
||||
struct s e; /* array e.n is aligned */
|
||||
};
|
||||
|
||||
int main1 ()
|
||||
{
|
||||
int i,j;
|
||||
struct test1 tmp1;
|
||||
|
||||
/* 3. aligned */
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
tmp1.e.n[1][2][i] = 7;
|
||||
}
|
||||
|
||||
/* check results: */
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
if (tmp1.e.n[1][2][i] != 7)
|
||||
abort ();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
|
||||
return main1 ();
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
50
gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-68d.c
Normal file
50
gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-68d.c
Normal file
|
@ -0,0 +1,50 @@
|
|||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "../../tree-vect.h"
|
||||
|
||||
#define N 20
|
||||
|
||||
struct s{
|
||||
int m;
|
||||
int n[N][N][N];
|
||||
};
|
||||
|
||||
struct test1{
|
||||
struct s a; /* array a.n is unaligned */
|
||||
int b;
|
||||
int c;
|
||||
struct s e; /* array e.n is aligned */
|
||||
};
|
||||
|
||||
int main1 ()
|
||||
{
|
||||
int i,j;
|
||||
struct test1 tmp1;
|
||||
|
||||
/* 4. unaligned */
|
||||
for (i = 3; i < N-3; i++)
|
||||
{
|
||||
tmp1.e.n[1][2][i] = 8;
|
||||
}
|
||||
|
||||
/* check results: */
|
||||
for (i = 3; i <N-3; i++)
|
||||
{
|
||||
if (tmp1.e.n[1][2][i] != 8)
|
||||
abort ();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
|
||||
return main1 ();
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorization not profitable" 0 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
47
gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-76a.c
Normal file
47
gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-76a.c
Normal file
|
@ -0,0 +1,47 @@
|
|||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "../../tree-vect.h"
|
||||
|
||||
#define N 8
|
||||
#define OFF 4
|
||||
|
||||
/* Check handling of accesses for which the "initial condition" -
|
||||
the expression that represents the first location accessed - is
|
||||
more involved than just an ssa_name. */
|
||||
|
||||
int ib[N+OFF] __attribute__ ((__aligned__(16))) = {0, 1, 3, 5, 7, 11, 13, 17, 0, 2, 6, 10};
|
||||
|
||||
int main1 (int *pib)
|
||||
{
|
||||
int i;
|
||||
int ia[N+OFF];
|
||||
int ic[N+OFF] = {0, 1, 3, 5, 7, 11, 13, 17, 0, 2, 6, 10};
|
||||
|
||||
for (i = OFF; i < N; i++)
|
||||
{
|
||||
ia[i] = pib[i - OFF];
|
||||
}
|
||||
|
||||
|
||||
/* check results: */
|
||||
for (i = OFF; i < N; i++)
|
||||
{
|
||||
if (ia[i] != pib[i - OFF])
|
||||
abort ();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
|
||||
main1 (&ib[OFF]);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
47
gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-76b.c
Normal file
47
gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-76b.c
Normal file
|
@ -0,0 +1,47 @@
|
|||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "../../tree-vect.h"
|
||||
|
||||
#define N 8
|
||||
#define OFF 4
|
||||
|
||||
/* Check handling of accesses for which the "initial condition" -
|
||||
the expression that represents the first location accessed - is
|
||||
more involved than just an ssa_name. */
|
||||
|
||||
int ib[N+OFF] __attribute__ ((__aligned__(16))) = {0, 1, 3, 5, 7, 11, 13, 17, 0, 2, 6, 10};
|
||||
|
||||
int main1 (int *pib)
|
||||
{
|
||||
int i;
|
||||
int ia[N+OFF];
|
||||
int ic[N+OFF] = {0, 1, 3, 5, 7, 11, 13, 17, 0, 2, 6, 10};
|
||||
|
||||
for (i = OFF; i < N; i++)
|
||||
{
|
||||
pib[i - OFF] = ic[i];
|
||||
}
|
||||
|
||||
|
||||
/* check results: */
|
||||
for (i = OFF; i < N; i++)
|
||||
{
|
||||
if (pib[i - OFF] != ic[i])
|
||||
abort ();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
|
||||
main1 (&ib[OFF]);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorization not profitable" 0 "vect" } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
47
gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-76c.c
Normal file
47
gcc/testsuite/gcc.dg/vect/costmodel/spu/costmodel-vect-76c.c
Normal file
|
@ -0,0 +1,47 @@
|
|||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "../../tree-vect.h"
|
||||
|
||||
#define N 8
|
||||
#define OFF 4
|
||||
|
||||
/* Check handling of accesses for which the "initial condition" -
|
||||
the expression that represents the first location accessed - is
|
||||
more involved than just an ssa_name. */
|
||||
|
||||
int ib[N+OFF] __attribute__ ((__aligned__(16))) = {0, 1, 3, 5, 7, 11, 13, 17, 0, 2, 6, 10};
|
||||
|
||||
int main1 (int *pib)
|
||||
{
|
||||
int i;
|
||||
int ia[N+OFF];
|
||||
int ic[N+OFF] = {0, 1, 3, 5, 7, 11, 13, 17, 0, 2, 6, 10};
|
||||
|
||||
for (i = OFF; i < N; i++)
|
||||
{
|
||||
ia[i] = ic[i - OFF];
|
||||
}
|
||||
|
||||
|
||||
/* check results: */
|
||||
for (i = OFF; i < N; i++)
|
||||
{
|
||||
if (ia[i] != ic[i - OFF])
|
||||
abort ();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
|
||||
main1 (&ib[OFF]);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
|
@ -0,0 +1,38 @@
|
|||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "../../tree-vect.h"
|
||||
|
||||
#define N 26
|
||||
int a[N];
|
||||
|
||||
int main1 (int X)
|
||||
{
|
||||
int s = X;
|
||||
int i;
|
||||
|
||||
/* vectorization of reduction with induction. */
|
||||
for (i = 0; i < N; i++)
|
||||
s += (i + a[i]);
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
int s, i;
|
||||
check_vect ();
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
a[i] = 2*i;
|
||||
|
||||
s = main1 (3);
|
||||
if (s != 978)
|
||||
abort ();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target vect_int_mult } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target {! vect_int_mult } } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
|
@ -0,0 +1,69 @@
|
|||
# Copyright (C) 1997, 2004, 2005, 2006 Free Software Foundation, Inc.
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
|
||||
# GCC testsuite that uses the `dg.exp' driver.
|
||||
|
||||
# Load support procs.
|
||||
load_lib gcc-dg.exp
|
||||
|
||||
# Exit immediately if this isn't a powerpc target.
|
||||
if { ![istarget spu*-*-*] } then {
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
# Set up flags used for tests that don't specify options.
|
||||
set DEFAULT_VECTCFLAGS ""
|
||||
|
||||
# These flags are used for all targets.
|
||||
lappend DEFAULT_VECTCFLAGS "-O2" "-ftree-vectorize" "-fvect-cost-model"
|
||||
|
||||
# If the target system supports vector instructions, the default action
|
||||
# for a test is 'run', otherwise it's 'compile'. Save current default.
|
||||
# Executing vector instructions on a system without hardware vector support
|
||||
# is also disabled by a call to check_vect, but disabling execution here is
|
||||
# more efficient.
|
||||
global dg-do-what-default
|
||||
set save-dg-do-what-default ${dg-do-what-default}
|
||||
|
||||
set dg-do-what-default run
|
||||
|
||||
# Initialize `dg'.
|
||||
dg-init
|
||||
|
||||
lappend DEFAULT_VECTCFLAGS "-fdump-tree-vect-details"
|
||||
|
||||
# Main loop.
|
||||
dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/costmodel-pr*.\[cS\]]] \
|
||||
"" $DEFAULT_VECTCFLAGS
|
||||
dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/costmodel-vect-*.\[cS\]]] \
|
||||
"" $DEFAULT_VECTCFLAGS
|
||||
|
||||
#### Tests with special options
|
||||
global SAVED_DEFAULT_VECTCFLAGS
|
||||
set SAVED_DEFAULT_VECTCFLAGS $DEFAULT_VECTCFLAGS
|
||||
|
||||
# -ffast-math tests
|
||||
set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS
|
||||
lappend DEFAULT_VECTCFLAGS "-ffast-math"
|
||||
dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/costmodel-fast-math-vect*.\[cS\]]] \
|
||||
"" $DEFAULT_VECTCFLAGS
|
||||
|
||||
# Clean up.
|
||||
set dg-do-what-default ${save-dg-do-what-default}
|
||||
|
||||
# All done.
|
||||
dg-finish
|
|
@ -2039,6 +2039,7 @@ proc check_effective_target_vect_int_mult { } {
|
|||
} else {
|
||||
set et_vect_int_mult_saved 0
|
||||
if { [istarget powerpc*-*-*]
|
||||
|| [istarget spu-*-*]
|
||||
|| [istarget i?86-*-*]
|
||||
|| [istarget x86_64-*-*] } {
|
||||
set et_vect_int_mult_saved 1
|
||||
|
|
|
@ -74,6 +74,34 @@ static void vect_update_inits_of_drs (loop_vec_info, tree);
|
|||
static int vect_min_worthwhile_factor (enum tree_code);
|
||||
|
||||
|
||||
static int
|
||||
cost_for_stmt (tree stmt)
|
||||
{
|
||||
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
|
||||
|
||||
switch (STMT_VINFO_TYPE (stmt_info))
|
||||
{
|
||||
case load_vec_info_type:
|
||||
return TARG_SCALAR_LOAD_COST;
|
||||
case store_vec_info_type:
|
||||
return TARG_SCALAR_STORE_COST;
|
||||
case op_vec_info_type:
|
||||
case condition_vec_info_type:
|
||||
case assignment_vec_info_type:
|
||||
case reduc_vec_info_type:
|
||||
case induc_vec_info_type:
|
||||
case type_promotion_vec_info_type:
|
||||
case type_demotion_vec_info_type:
|
||||
case type_conversion_vec_info_type:
|
||||
case call_vec_info_type:
|
||||
return TARG_SCALAR_STMT_COST;
|
||||
case undef_vec_info_type:
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Function vect_estimate_min_profitable_iters
|
||||
|
||||
Return the number of iterations required for the vector version of the
|
||||
|
@ -138,7 +166,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
|
|||
if (!STMT_VINFO_RELEVANT_P (stmt_info)
|
||||
&& !STMT_VINFO_LIVE_P (stmt_info))
|
||||
continue;
|
||||
scalar_single_iter_cost++;
|
||||
scalar_single_iter_cost += cost_for_stmt (stmt);
|
||||
vec_inside_cost += STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info);
|
||||
vec_outside_cost += STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info);
|
||||
}
|
||||
|
@ -148,7 +176,7 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
|
|||
loop.
|
||||
|
||||
FORNOW: If we dont know the value of peel_iters for prologue or epilogue
|
||||
at compile-time - we assume the worst.
|
||||
at compile-time - we assume it's (vf-1)/2 (the worst would be vf-1).
|
||||
|
||||
TODO: Build an expression that represents peel_iters for prologue and
|
||||
epilogue to be used in a run-time test. */
|
||||
|
@ -157,17 +185,17 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
|
|||
|
||||
if (byte_misalign < 0)
|
||||
{
|
||||
peel_iters_prologue = vf - 1;
|
||||
peel_iters_prologue = (vf - 1)/2;
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
fprintf (vect_dump, "cost model: "
|
||||
"prologue peel iters set conservatively.");
|
||||
"prologue peel iters set to (vf-1)/2.");
|
||||
|
||||
/* If peeling for alignment is unknown, loop bound of main loop becomes
|
||||
unknown. */
|
||||
peel_iters_epilogue = vf - 1;
|
||||
peel_iters_epilogue = (vf - 1)/2;
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
fprintf (vect_dump, "cost model: "
|
||||
"epilogue peel iters set conservatively because "
|
||||
"epilogue peel iters set to (vf-1)/2 because "
|
||||
"peeling for alignment is unknown .");
|
||||
}
|
||||
else
|
||||
|
@ -186,10 +214,10 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
|
|||
|
||||
if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
|
||||
{
|
||||
peel_iters_epilogue = vf - 1;
|
||||
peel_iters_epilogue = (vf - 1)/2;
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
fprintf (vect_dump, "cost model: "
|
||||
"epilogue peel iters set conservatively because "
|
||||
"epilogue peel iters set to (vf-1)/2 because "
|
||||
"loop iterations are unknown .");
|
||||
}
|
||||
else
|
||||
|
@ -229,6 +257,26 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
|
|||
vec_outside_cost += (peel_iters_prologue * scalar_single_iter_cost)
|
||||
+ (peel_iters_epilogue * scalar_single_iter_cost);
|
||||
|
||||
/* Allow targets add additional (outside-of-loop) costs. FORNOW, the only
|
||||
information we provide for the target is whether testing against the
|
||||
threshold involves a runtime test. */
|
||||
if (targetm.vectorize.builtin_vectorization_cost)
|
||||
{
|
||||
bool runtime_test = false;
|
||||
|
||||
/* If the number of iterations is unknown, or the
|
||||
peeling-for-misalignment amount is unknown, we eill have to generate
|
||||
a runtime test to test the loop count agains the threshold. */
|
||||
if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
|
||||
|| (byte_misalign < 0))
|
||||
runtime_test = true;
|
||||
vec_outside_cost +=
|
||||
targetm.vectorize.builtin_vectorization_cost (runtime_test);
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
fprintf (vect_dump, "cost model : Adding target out-of-loop cost = %d",
|
||||
targetm.vectorize.builtin_vectorization_cost (runtime_test));
|
||||
}
|
||||
|
||||
/* Calculate number of iterations required to make the vector version
|
||||
profitable, relative to the loop bodies only. The following condition
|
||||
must hold true: ((SIC*VF)-VIC)*niters > VOC*VF, where
|
||||
|
@ -280,7 +328,14 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
|
|||
min_profitable_iters < vf ? vf : min_profitable_iters);
|
||||
}
|
||||
|
||||
return min_profitable_iters < vf ? vf : min_profitable_iters;
|
||||
min_profitable_iters =
|
||||
min_profitable_iters < vf ? vf : min_profitable_iters;
|
||||
|
||||
/* Because the condition we create is:
|
||||
if (niters <= min_profitable_iters)
|
||||
then skip the vectorized loop. */
|
||||
min_profitable_iters--;
|
||||
return min_profitable_iters;
|
||||
}
|
||||
|
||||
|
||||
|
@ -321,7 +376,7 @@ vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
|
|||
code = TREE_CODE (GIMPLE_STMT_OPERAND (orig_stmt, 1));
|
||||
|
||||
/* Add in cost for initial definition. */
|
||||
outer_cost += TARG_VEC_STMT_COST;
|
||||
outer_cost += TARG_SCALAR_TO_VEC_COST;
|
||||
|
||||
/* Determine cost of epilogue code.
|
||||
|
||||
|
@ -341,11 +396,13 @@ vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
|
|||
optab = optab_for_tree_code (code, vectype);
|
||||
|
||||
/* We have a whole vector shift available. */
|
||||
if (!VECTOR_MODE_P (mode)
|
||||
|| optab->handlers[mode].insn_code == CODE_FOR_nothing)
|
||||
if (VECTOR_MODE_P (mode)
|
||||
&& optab->handlers[mode].insn_code != CODE_FOR_nothing
|
||||
&& vec_shr_optab->handlers[mode].insn_code != CODE_FOR_nothing)
|
||||
/* Final reduction via vector shifts and the reduction operator. Also
|
||||
requires scalar extract. */
|
||||
outer_cost += ((exact_log2(nelements) * 2 + 1) * TARG_VEC_STMT_COST);
|
||||
outer_cost += ((exact_log2(nelements) * 2) * TARG_VEC_STMT_COST
|
||||
+ TARG_VEC_TO_SCALAR_COST);
|
||||
else
|
||||
/* Use extracts and reduction op for final reduction. For N elements,
|
||||
we have N extracts and N-1 reduction ops. */
|
||||
|
|
|
@ -326,6 +326,21 @@ typedef struct _stmt_vec_info {
|
|||
#define TARG_COND_BRANCH_COST 3
|
||||
#endif
|
||||
|
||||
/* Cost of any scalar operation, excluding load and store. */
|
||||
#ifndef TARG_SCALAR_STMT_COST
|
||||
#define TARG_SCALAR_STMT_COST 1
|
||||
#endif
|
||||
|
||||
/* Cost of scalar load. */
|
||||
#ifndef TARG_SCALAR_LOAD_COST
|
||||
#define TARG_SCALAR_LOAD_COST 1
|
||||
#endif
|
||||
|
||||
/* Cost of scalar store. */
|
||||
#ifndef TARG_SCALAR_STORE_COST
|
||||
#define TARG_SCALAR_STORE_COST 1
|
||||
#endif
|
||||
|
||||
/* Cost of any vector operation, excluding load, store or vector to scalar
|
||||
operation. */
|
||||
#ifndef TARG_VEC_STMT_COST
|
||||
|
|
Loading…
Add table
Reference in a new issue