tree-vectorizer.c: Depend on langhooks.h.
* tree-vectorizer.c: Depend on langhooks.h. (supportable_widening_operation): Add two arguments. Support double type conversions. (supportable_narrowing_operation): Likewise. * tree-vectorizer.h (supportable_widening_operation): Add two arguments. (supportable_narrowing_operation): Likewise. * tree-vect-patterns.c (vect_recog_widen_mult_pattern) : Call supportable_widening_operation with correct arguments. * tree-vect-transform.c (vectorizable_conversion): Likewise. (vectorizable_type_demotion): Support double type conversions. (vectorizable_type_promotion): Likewise. * Makefile.in (tree-vectorizer.o): Depend on langhooks.h. From-SVN: r138988
This commit is contained in:
parent
62641193ee
commit
ad2dd72a0e
22 changed files with 510 additions and 54 deletions
|
@ -1,3 +1,19 @@
|
|||
2008-08-12 Ira Rosen <irar@il.ibm.com>
|
||||
|
||||
* tree-vectorizer.c: Depend on langhooks.h.
|
||||
(supportable_widening_operation): Add two arguments. Support double
|
||||
type conversions.
|
||||
(supportable_narrowing_operation): Likewise.
|
||||
* tree-vectorizer.h (supportable_widening_operation): Add two
|
||||
arguments.
|
||||
(supportable_narrowing_operation): Likewise.
|
||||
* tree-vect-patterns.c (vect_recog_widen_mult_pattern) : Call
|
||||
supportable_widening_operation with correct arguments.
|
||||
* tree-vect-transform.c (vectorizable_conversion): Likewise.
|
||||
(vectorizable_type_demotion): Support double type conversions.
|
||||
(vectorizable_type_promotion): Likewise.
|
||||
* Makefile.in (tree-vectorizer.o): Depend on langhooks.h.
|
||||
|
||||
2008-08-11 Michael Matz <matz@suse.de>
|
||||
|
||||
* i386/i386.c (override_options): Move initialisation from
|
||||
|
|
|
@ -2344,7 +2344,7 @@ tree-vectorizer.o: tree-vectorizer.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
|
|||
$(TM_H) $(GGC_H) $(OPTABS_H) $(TREE_H) $(RTL_H) $(BASIC_BLOCK_H) \
|
||||
$(DIAGNOSTIC_H) $(TREE_FLOW_H) $(TREE_DUMP_H) $(TIMEVAR_H) $(CFGLOOP_H) \
|
||||
tree-pass.h $(EXPR_H) $(RECOG_H) tree-vectorizer.h $(TREE_DATA_REF_H) $(SCEV_H) \
|
||||
$(INPUT_H) $(TARGET_H) $(CFGLAYOUT_H) $(TOPLEV_H) tree-chrec.h
|
||||
$(INPUT_H) $(TARGET_H) $(CFGLAYOUT_H) $(TOPLEV_H) tree-chrec.h langhooks.h
|
||||
tree-loop-linear.o: tree-loop-linear.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
|
||||
$(TM_H) $(GGC_H) $(OPTABS_H) $(TREE_H) $(RTL_H) $(BASIC_BLOCK_H) \
|
||||
$(DIAGNOSTIC_H) $(TREE_FLOW_H) $(TREE_DUMP_H) $(TIMEVAR_H) $(CFGLOOP_H) \
|
||||
|
|
|
@ -1,3 +1,25 @@
|
|||
2008-08-12 Ira Rosen <irar@il.ibm.com>
|
||||
|
||||
* gcc.dg/vect/vect-multitypes-12.c: New.
|
||||
* gcc.dg/vect/vect-multitypes-13.c, gcc.dg/vect/vect-multitypes-14.c,
|
||||
gcc.dg/vect/vect-multitypes-15.c : Likewise.
|
||||
* gcc.dg/vect/vect-reduc-dot-u8a.c: Avoid vectorization of the
|
||||
init loop.
|
||||
* gcc.dg/vect/vect-72.c, gcc.dg/vect/vect-strided-store-a-u8-i2.c:
|
||||
Likewise.
|
||||
* gcc.dg/vect/vect-reduc-dot-u8b.c: Avoid vectorization of the init
|
||||
loop.
|
||||
Should be vectorizable on targets that support vector unpack.
|
||||
* gcc.dg/vect/vect-widen-mult-u8.c,
|
||||
gcc.dg/vect/wrapv-vect-reduc-dot-s8b.c,.
|
||||
gcc.dg/vect/wrapv-vect-reduc-dot-s8b.c: Likewise.
|
||||
* gcc.dg/vect/vect-35.c: Should be vectorizable on targets that
|
||||
support vector pack. Avoid vectorization of the init loop.
|
||||
* gcc.dg/vect/vect-reduc-pattern-1b.c: Should be vectorizable on
|
||||
targets that support vector unpack.
|
||||
* gcc.dg/vect/vect-reduc-pattern-2b.c,
|
||||
gcc.dg/vect/vect-reduc-dot-s8c.c, gcc.dg/vect/vect-112.c: Likewise.
|
||||
|
||||
2008-08-11 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
PR rtl-optimization/36998
|
||||
|
|
|
@ -32,7 +32,7 @@ int main (void)
|
|||
return main1 ();
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_unpack } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
||||
|
||||
|
|
|
@ -18,6 +18,8 @@ int main1 ()
|
|||
for (i = 0; i < N; i++)
|
||||
{
|
||||
s.b[i] = 3*i;
|
||||
if (i%3 == 0)
|
||||
s.b[i] = 3*i;
|
||||
}
|
||||
|
||||
/* Dependence analysis fails cause s.a and s.b may overlap.
|
||||
|
|
|
@ -18,6 +18,9 @@ int main1 ()
|
|||
for (i=0; i < N+1; i++)
|
||||
{
|
||||
ib[i] = i;
|
||||
/* Avoid vectorization. */
|
||||
if (i%3 == 0)
|
||||
ib[i] = 5;
|
||||
}
|
||||
|
||||
for (i = 1; i < N+1; i++)
|
||||
|
|
44
gcc/testsuite/gcc.dg/vect/vect-multitypes-12.c
Normal file
44
gcc/testsuite/gcc.dg/vect/vect-multitypes-12.c
Normal file
|
@ -0,0 +1,44 @@
|
|||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 64
|
||||
|
||||
char x[N] __attribute__ ((__aligned__(16)));
|
||||
|
||||
__attribute__ ((noinline)) int
|
||||
foo (int len, int *z) {
|
||||
int i;
|
||||
|
||||
for (i=0; i<len; i++) {
|
||||
z[i] = x[i];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int main (void)
|
||||
{
|
||||
char i;
|
||||
int z[N+4];
|
||||
|
||||
check_vect ();
|
||||
|
||||
for (i=0; i<N; i++) {
|
||||
x[i] = i;
|
||||
}
|
||||
|
||||
foo (N,z+2);
|
||||
|
||||
for (i=0; i<N; i++) {
|
||||
if (z[i+2] != x[i])
|
||||
abort ();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target vect_unpack } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { ! vect_unpack } } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
63
gcc/testsuite/gcc.dg/vect/vect-multitypes-13.c
Normal file
63
gcc/testsuite/gcc.dg/vect/vect-multitypes-13.c
Normal file
|
@ -0,0 +1,63 @@
|
|||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 64
|
||||
|
||||
unsigned char uX[N] __attribute__ ((__aligned__(16)));
|
||||
unsigned int uresult[N];
|
||||
signed char X[N] __attribute__ ((__aligned__(16)));
|
||||
int result[N];
|
||||
|
||||
/* Unsigned type promotion (qi->si) */
|
||||
__attribute__ ((noinline)) int
|
||||
foo1(int len) {
|
||||
int i;
|
||||
|
||||
for (i=0; i<len; i++) {
|
||||
uresult[i] = (unsigned int)uX[i];
|
||||
}
|
||||
}
|
||||
|
||||
/* Signed type promotion (qi->si) */
|
||||
__attribute__ ((noinline)) int
|
||||
foo2(int len) {
|
||||
int i;
|
||||
|
||||
for (i=0; i<len; i++) {
|
||||
result[i] = (int)X[i];
|
||||
}
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
check_vect ();
|
||||
|
||||
for (i=0; i<N; i++) {
|
||||
X[i] = 16-i;
|
||||
uX[i] = 16-i;
|
||||
}
|
||||
|
||||
foo1 (N);
|
||||
|
||||
for (i=0; i<N; i++) {
|
||||
if (uresult[i] != (unsigned int)uX[i])
|
||||
abort ();
|
||||
}
|
||||
|
||||
foo2 (N);
|
||||
|
||||
for (i=0; i<N; i++) {
|
||||
if (result[i] != (int)X[i])
|
||||
abort ();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target vect_unpack } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
50
gcc/testsuite/gcc.dg/vect/vect-multitypes-14.c
Normal file
50
gcc/testsuite/gcc.dg/vect/vect-multitypes-14.c
Normal file
|
@ -0,0 +1,50 @@
|
|||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 64
|
||||
|
||||
unsigned char uX[N] __attribute__ ((__aligned__(16)));
|
||||
unsigned char uresultX[N];
|
||||
unsigned int uY[N] __attribute__ ((__aligned__(16)));
|
||||
unsigned char uresultY[N];
|
||||
|
||||
/* Unsigned type demotion (si->qi) */
|
||||
|
||||
__attribute__ ((noinline)) int
|
||||
foo1(int len) {
|
||||
int i;
|
||||
|
||||
for (i=0; i<len; i++) {
|
||||
uresultX[i] = uX[i];
|
||||
uresultY[i] = (unsigned char)uY[i];
|
||||
}
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
check_vect ();
|
||||
|
||||
for (i=0; i<N; i++) {
|
||||
uX[i] = 16-i;
|
||||
uY[i] = 16-i;
|
||||
}
|
||||
|
||||
foo1 (N);
|
||||
|
||||
for (i=0; i<N; i++) {
|
||||
if (uresultX[i] != uX[i])
|
||||
abort ();
|
||||
if (uresultY[i] != (unsigned char)uY[i])
|
||||
abort ();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_pack_trunc } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
50
gcc/testsuite/gcc.dg/vect/vect-multitypes-15.c
Normal file
50
gcc/testsuite/gcc.dg/vect/vect-multitypes-15.c
Normal file
|
@ -0,0 +1,50 @@
|
|||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
#include <stdio.h>
|
||||
|
||||
#define N 64
|
||||
|
||||
#define DOT1 43680
|
||||
#define DOT2 -20832
|
||||
|
||||
signed char X[N] __attribute__ ((__aligned__(16)));
|
||||
signed char Y[N] __attribute__ ((__aligned__(16)));
|
||||
unsigned char CX[N] __attribute__ ((__aligned__(16)));
|
||||
|
||||
__attribute__ ((noinline)) void
|
||||
foo1(int len) {
|
||||
int i;
|
||||
int result1 = 0;
|
||||
|
||||
for (i=0; i<len; i++) {
|
||||
result1 += (X[i] * Y[i]);
|
||||
CX[i] = 5;
|
||||
}
|
||||
|
||||
if (result1 != DOT1)
|
||||
abort ();
|
||||
}
|
||||
|
||||
|
||||
int main (void)
|
||||
{
|
||||
int i, dot1, dot2;
|
||||
|
||||
check_vect ();
|
||||
|
||||
for (i=0; i<N; i++) {
|
||||
X[i] = i;
|
||||
Y[i] = 64-i;
|
||||
CX[i] = i;
|
||||
}
|
||||
|
||||
foo1 (N);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_sdot_hi || vect_uncpack } } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
|
@ -11,8 +11,7 @@ signed char X[N] __attribute__ ((__aligned__(16)));
|
|||
signed char Y[N] __attribute__ ((__aligned__(16)));
|
||||
|
||||
/* char->int->int dot product.
|
||||
Not detected as a dot-product pattern.
|
||||
Currently fails to be vectorized due to presence of type conversions. */
|
||||
Not detected as a dot-product pattern. */
|
||||
__attribute__ ((noinline)) int
|
||||
foo3(int len) {
|
||||
int i;
|
||||
|
@ -42,6 +41,5 @@ int main (void)
|
|||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_unpack } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
|
|
@ -44,6 +44,8 @@ int main (void)
|
|||
for (i=0; i<N; i++) {
|
||||
X[i] = i;
|
||||
Y[i] = 64-i;
|
||||
if (i%5 == 0)
|
||||
X[i] = i;
|
||||
}
|
||||
|
||||
dot = foo (N);
|
||||
|
|
|
@ -39,6 +39,9 @@ int main (void)
|
|||
for (i=0; i<N; i++) {
|
||||
X[i] = i;
|
||||
Y[i] = 64-i;
|
||||
/* Avoid vectorization. */
|
||||
if (i%100 == 0)
|
||||
X[i] = i;
|
||||
}
|
||||
|
||||
dot = foo (N);
|
||||
|
@ -54,7 +57,7 @@ int main (void)
|
|||
targets that support accumulation into int (powerpc, ia64) we'd have:
|
||||
dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_udot_qi || vect_widen_mult_qi_to_hi } }
|
||||
*/
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" {target vect_widen_mult_qi_to_hi} } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" {target { vect_widen_mult_qi_to_hi || vect_unpack } } } } */
|
||||
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
||||
|
|
|
@ -35,6 +35,6 @@ main (void)
|
|||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vect_recog_widen_sum_pattern: detected" 1 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_widen_sum_qi_to_si } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" { target { ! vect_widen_sum_qi_to_si } } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_widen_sum_qi_to_si || vect_unpack } } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" { target { { ! vect_widen_sum_qi_to_si } && { ! vect_unpack } } } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
|
|
@ -35,6 +35,6 @@ main (void)
|
|||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vect_recog_widen_sum_pattern: detected" 1 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_widen_sum_qi_to_si } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" { target { ! vect_widen_sum_qi_to_si } } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_widen_sum_qi_to_si && vect_unpack } } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" { target { { ! vect_widen_sum_qi_to_si } && { ! vect_unpack } } } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
|
|
@ -24,8 +24,8 @@ main1 ()
|
|||
{
|
||||
a[i] = i;
|
||||
b[i] = i * 2;
|
||||
if (a[i] == 178)
|
||||
abort();
|
||||
if (i%3 == 0)
|
||||
a[i] = 10;
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
|
|
|
@ -28,6 +28,8 @@ int main (void)
|
|||
for (i=0; i<N; i++) {
|
||||
X[i] = i;
|
||||
Y[i] = 64-i;
|
||||
if (i%4 == 0)
|
||||
X[i] = 5;
|
||||
}
|
||||
|
||||
foo1 (N);
|
||||
|
@ -40,6 +42,6 @@ int main (void)
|
|||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_widen_mult_qi_to_hi } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_widen_mult_qi_to_hi || vect_unpack } } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
||||
|
|
|
@ -40,6 +40,8 @@ int main (void)
|
|||
for (i=0; i<N; i++) {
|
||||
X[i] = i;
|
||||
Y[i] = 64-i;
|
||||
if (i%5 == 0)
|
||||
X[i] = i;
|
||||
}
|
||||
|
||||
dot = foo (N);
|
||||
|
@ -57,6 +59,6 @@ int main (void)
|
|||
dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_sdot_qi } }
|
||||
*/
|
||||
/* In the meantime expect: */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_widen_mult_qi_to_hi } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_widen_mult_qi_to_hi || vect_unpack } } } } */
|
||||
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
|
|
@ -374,6 +374,7 @@ vect_recog_widen_mult_pattern (gimple last_stmt,
|
|||
tree dummy;
|
||||
tree var;
|
||||
enum tree_code dummy_code;
|
||||
bool dummy_bool;
|
||||
|
||||
if (!is_gimple_assign (last_stmt))
|
||||
return NULL;
|
||||
|
@ -414,7 +415,7 @@ vect_recog_widen_mult_pattern (gimple last_stmt,
|
|||
if (!vectype
|
||||
|| !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt, vectype,
|
||||
&dummy, &dummy, &dummy_code,
|
||||
&dummy_code))
|
||||
&dummy_code, &dummy_bool, &dummy))
|
||||
return NULL;
|
||||
|
||||
*type_in = vectype;
|
||||
|
|
|
@ -3457,6 +3457,8 @@ vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
|
|||
VEC(tree,heap) *vec_oprnds0 = NULL;
|
||||
tree vop0;
|
||||
tree integral_type;
|
||||
tree dummy;
|
||||
bool dummy_bool;
|
||||
|
||||
/* Is STMT a vectorizable conversion? */
|
||||
|
||||
|
@ -3547,10 +3549,11 @@ vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
|
|||
|| (modifier == WIDEN
|
||||
&& !supportable_widening_operation (code, stmt, vectype_in,
|
||||
&decl1, &decl2,
|
||||
&code1, &code2))
|
||||
&code1, &code2,
|
||||
&dummy_bool, &dummy))
|
||||
|| (modifier == NARROW
|
||||
&& !supportable_narrowing_operation (code, stmt, vectype_in,
|
||||
&code1)))
|
||||
&code1, &dummy_bool, &dummy)))
|
||||
{
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
fprintf (vect_dump, "conversion not supported by target.");
|
||||
|
@ -4268,6 +4271,10 @@ vectorizable_type_demotion (gimple stmt, gimple_stmt_iterator *gsi,
|
|||
int ncopies;
|
||||
int j;
|
||||
tree vectype_in;
|
||||
tree intermediate_type = NULL_TREE, narrow_type, double_vec_dest;
|
||||
bool double_op = false;
|
||||
tree first_vector, second_vector;
|
||||
tree vec_oprnd2 = NULL_TREE, vec_oprnd3 = NULL_TREE, last_oprnd = NULL_TREE;
|
||||
|
||||
if (!STMT_VINFO_RELEVANT_P (stmt_info))
|
||||
return false;
|
||||
|
@ -4297,7 +4304,8 @@ vectorizable_type_demotion (gimple stmt, gimple_stmt_iterator *gsi,
|
|||
if (!vectype_out)
|
||||
return false;
|
||||
nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
|
||||
if (nunits_in != nunits_out / 2) /* FORNOW */
|
||||
if (nunits_in != nunits_out / 2
|
||||
&& nunits_in != nunits_out/4)
|
||||
return false;
|
||||
|
||||
ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
|
||||
|
@ -4326,7 +4334,8 @@ vectorizable_type_demotion (gimple stmt, gimple_stmt_iterator *gsi,
|
|||
}
|
||||
|
||||
/* Supportable by target? */
|
||||
if (!supportable_narrowing_operation (code, stmt, vectype_in, &code1))
|
||||
if (!supportable_narrowing_operation (code, stmt, vectype_in, &code1,
|
||||
&double_op, &intermediate_type))
|
||||
return false;
|
||||
|
||||
STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
|
||||
|
@ -4346,8 +4355,15 @@ vectorizable_type_demotion (gimple stmt, gimple_stmt_iterator *gsi,
|
|||
ncopies);
|
||||
|
||||
/* Handle def. */
|
||||
vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
|
||||
|
||||
/* In case of double demotion, we first generate demotion operation to the
|
||||
intermediate type, and then from that type to the final one. */
|
||||
if (double_op)
|
||||
narrow_type = intermediate_type;
|
||||
else
|
||||
narrow_type = vectype_out;
|
||||
vec_dest = vect_create_destination_var (scalar_dest, narrow_type);
|
||||
double_vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
|
||||
|
||||
/* In case the vectorization factor (VF) is bigger than the number
|
||||
of elements that we can fit in a vectype (nunits), we have to generate
|
||||
more than one vector stmt - i.e - we need to "unroll" the
|
||||
|
@ -4358,22 +4374,59 @@ vectorizable_type_demotion (gimple stmt, gimple_stmt_iterator *gsi,
|
|||
/* Handle uses. */
|
||||
if (j == 0)
|
||||
{
|
||||
vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
|
||||
vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
|
||||
vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
|
||||
vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
|
||||
if (double_op)
|
||||
{
|
||||
/* For double demotion we need four operands. */
|
||||
vec_oprnd2 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd1);
|
||||
vec_oprnd3 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd2);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd1);
|
||||
vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
|
||||
vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], last_oprnd);
|
||||
vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
|
||||
if (double_op)
|
||||
{
|
||||
/* For double demotion we need four operands. */
|
||||
vec_oprnd2 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd1);
|
||||
vec_oprnd3 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd2);
|
||||
}
|
||||
}
|
||||
|
||||
/* Arguments are ready. Create the new vector stmt. */
|
||||
/* Arguments are ready. Create the new vector stmts. */
|
||||
new_stmt = gimple_build_assign_with_ops (code1, vec_dest, vec_oprnd0,
|
||||
vec_oprnd1);
|
||||
new_temp = make_ssa_name (vec_dest, new_stmt);
|
||||
gimple_assign_set_lhs (new_stmt, new_temp);
|
||||
vec_oprnd1);
|
||||
first_vector = make_ssa_name (vec_dest, new_stmt);
|
||||
gimple_assign_set_lhs (new_stmt, first_vector);
|
||||
vect_finish_stmt_generation (stmt, new_stmt, gsi);
|
||||
|
||||
/* In the next iteration we will get copy for this operand. */
|
||||
last_oprnd = vec_oprnd1;
|
||||
|
||||
if (double_op)
|
||||
{
|
||||
/* For double demotion operation we first generate two demotion
|
||||
operations from the source type to the intermediate type, and
|
||||
then combine the results in one demotion to the destination
|
||||
type. */
|
||||
new_stmt = gimple_build_assign_with_ops (code1, vec_dest, vec_oprnd2,
|
||||
vec_oprnd3);
|
||||
second_vector = make_ssa_name (vec_dest, new_stmt);
|
||||
gimple_assign_set_lhs (new_stmt, second_vector);
|
||||
vect_finish_stmt_generation (stmt, new_stmt, gsi);
|
||||
|
||||
new_stmt = gimple_build_assign_with_ops (code1, double_vec_dest,
|
||||
first_vector, second_vector);
|
||||
new_temp = make_ssa_name (double_vec_dest, new_stmt);
|
||||
gimple_assign_set_lhs (new_stmt, new_temp);
|
||||
vect_finish_stmt_generation (stmt, new_stmt, gsi);
|
||||
|
||||
/* In the next iteration we will get copy for this operand. */
|
||||
last_oprnd = vec_oprnd3;
|
||||
}
|
||||
|
||||
if (j == 0)
|
||||
STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
|
||||
else
|
||||
|
@ -4420,6 +4473,9 @@ vectorizable_type_promotion (gimple stmt, gimple_stmt_iterator *gsi,
|
|||
int ncopies;
|
||||
int j;
|
||||
tree vectype_in;
|
||||
tree intermediate_type = NULL_TREE, first_vector, second_vector;
|
||||
bool double_op;
|
||||
tree wide_type, double_vec_dest;
|
||||
|
||||
if (!STMT_VINFO_RELEVANT_P (stmt_info))
|
||||
return false;
|
||||
|
@ -4450,7 +4506,7 @@ vectorizable_type_promotion (gimple stmt, gimple_stmt_iterator *gsi,
|
|||
if (!vectype_out)
|
||||
return false;
|
||||
nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
|
||||
if (nunits_out != nunits_in / 2) /* FORNOW */
|
||||
if (nunits_out != nunits_in / 2 && nunits_out != nunits_in/4)
|
||||
return false;
|
||||
|
||||
ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
|
||||
|
@ -4492,9 +4548,14 @@ vectorizable_type_promotion (gimple stmt, gimple_stmt_iterator *gsi,
|
|||
|
||||
/* Supportable by target? */
|
||||
if (!supportable_widening_operation (code, stmt, vectype_in,
|
||||
&decl1, &decl2, &code1, &code2))
|
||||
&decl1, &decl2, &code1, &code2,
|
||||
&double_op, &intermediate_type))
|
||||
return false;
|
||||
|
||||
/* Binary widening operation can only be supported directly by the
|
||||
architecture. */
|
||||
gcc_assert (!(double_op && op_type == binary_op));
|
||||
|
||||
STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
|
||||
|
||||
if (!vec_stmt) /* transformation not required. */
|
||||
|
@ -4513,7 +4574,13 @@ vectorizable_type_promotion (gimple stmt, gimple_stmt_iterator *gsi,
|
|||
ncopies);
|
||||
|
||||
/* Handle def. */
|
||||
vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
|
||||
if (double_op)
|
||||
wide_type = intermediate_type;
|
||||
else
|
||||
wide_type = vectype_out;
|
||||
|
||||
vec_dest = vect_create_destination_var (scalar_dest, wide_type);
|
||||
double_vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
|
||||
|
||||
/* In case the vectorization factor (VF) is bigger than the number
|
||||
of elements that we can fit in a vectype (nunits), we have to generate
|
||||
|
@ -4540,22 +4607,75 @@ vectorizable_type_promotion (gimple stmt, gimple_stmt_iterator *gsi,
|
|||
/* Arguments are ready. Create the new vector stmt. We are creating
|
||||
two vector defs because the widened result does not fit in one vector.
|
||||
The vectorized stmt can be expressed as a call to a target builtin,
|
||||
or a using a tree-code. */
|
||||
or a using a tree-code. In case of double promotion (from char to int,
|
||||
for example), the promotion is performed in two phases: first we
|
||||
generate a promotion operation from the source type to the intermediate
|
||||
type (short in case of char->int promotion), and then for each of the
|
||||
created vectors we generate a promotion statement from the intermediate
|
||||
type to the destination type. */
|
||||
/* Generate first half of the widened result: */
|
||||
new_stmt = vect_gen_widened_results_half (code1, vectype_out, decl1,
|
||||
new_stmt = vect_gen_widened_results_half (code1, wide_type, decl1,
|
||||
vec_oprnd0, vec_oprnd1, op_type, vec_dest, gsi, stmt);
|
||||
if (j == 0)
|
||||
STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
|
||||
if (is_gimple_call (new_stmt))
|
||||
first_vector = gimple_call_lhs (new_stmt);
|
||||
else
|
||||
STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
|
||||
prev_stmt_info = vinfo_for_stmt (new_stmt);
|
||||
first_vector = gimple_assign_lhs (new_stmt);
|
||||
|
||||
if (!double_op)
|
||||
{
|
||||
if (j == 0)
|
||||
STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
|
||||
else
|
||||
STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
|
||||
prev_stmt_info = vinfo_for_stmt (new_stmt);
|
||||
}
|
||||
|
||||
/* Generate second half of the widened result: */
|
||||
new_stmt = vect_gen_widened_results_half (code2, vectype_out, decl2,
|
||||
new_stmt = vect_gen_widened_results_half (code2, wide_type, decl2,
|
||||
vec_oprnd0, vec_oprnd1, op_type, vec_dest, gsi, stmt);
|
||||
STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
|
||||
prev_stmt_info = vinfo_for_stmt (new_stmt);
|
||||
if (is_gimple_call (new_stmt))
|
||||
second_vector = gimple_call_lhs (new_stmt);
|
||||
else
|
||||
second_vector = gimple_assign_lhs (new_stmt);
|
||||
|
||||
if (!double_op)
|
||||
{
|
||||
STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
|
||||
prev_stmt_info = vinfo_for_stmt (new_stmt);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* FIRST_VECTOR and SECOND_VECTOR are the results of source type
|
||||
to intermediate type promotion. Now we generate promotions
|
||||
for both of them to the destination type (i.e., four
|
||||
statements). */
|
||||
new_stmt = vect_gen_widened_results_half (code1, vectype_out,
|
||||
decl1, first_vector, NULL_TREE, op_type,
|
||||
double_vec_dest, gsi, stmt);
|
||||
if (j == 0)
|
||||
STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
|
||||
else
|
||||
STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
|
||||
prev_stmt_info = vinfo_for_stmt (new_stmt);
|
||||
|
||||
new_stmt = vect_gen_widened_results_half (code2, vectype_out,
|
||||
decl2, first_vector, NULL_TREE, op_type,
|
||||
double_vec_dest, gsi, stmt);
|
||||
STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
|
||||
prev_stmt_info = vinfo_for_stmt (new_stmt);
|
||||
|
||||
new_stmt = vect_gen_widened_results_half (code1, vectype_out,
|
||||
decl1, second_vector, NULL_TREE, op_type,
|
||||
double_vec_dest, gsi, stmt);
|
||||
STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
|
||||
prev_stmt_info = vinfo_for_stmt (new_stmt);
|
||||
|
||||
new_stmt = vect_gen_widened_results_half (code2, vectype_out,
|
||||
decl2, second_vector, NULL_TREE, op_type,
|
||||
double_vec_dest, gsi, stmt);
|
||||
STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
|
||||
prev_stmt_info = vinfo_for_stmt (new_stmt);
|
||||
}
|
||||
}
|
||||
|
||||
*vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
|
||||
|
|
|
@ -147,6 +147,7 @@ along with GCC; see the file COPYING3. If not see
|
|||
#include "hashtab.h"
|
||||
#include "tree-vectorizer.h"
|
||||
#include "tree-pass.h"
|
||||
#include "langhooks.h"
|
||||
|
||||
/*************************************************************************
|
||||
General Vectorization Utilities
|
||||
|
@ -2136,12 +2137,17 @@ vect_is_simple_use (tree operand, loop_vec_info loop_vinfo, gimple *def_stmt,
|
|||
vectorizing the operation, if available.
|
||||
- DECL1 and DECL2 are decls of target builtin functions to be used
|
||||
when vectorizing the operation, if available. In this case,
|
||||
CODE1 and CODE2 are CALL_EXPR. */
|
||||
CODE1 and CODE2 are CALL_EXPR.
|
||||
- DOUBLE_OP determines if the operation is a double cast, like
|
||||
char->short->int
|
||||
- INTERM_TYPE is the intermediate type required to perform the
|
||||
widening operation (short in the above example) */
|
||||
|
||||
bool
|
||||
supportable_widening_operation (enum tree_code code, gimple stmt, tree vectype,
|
||||
tree *decl1, tree *decl2,
|
||||
enum tree_code *code1, enum tree_code *code2)
|
||||
enum tree_code *code1, enum tree_code *code2,
|
||||
bool *double_op, tree *interm_type)
|
||||
{
|
||||
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
|
||||
loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
|
||||
|
@ -2154,6 +2160,8 @@ supportable_widening_operation (enum tree_code code, gimple stmt, tree vectype,
|
|||
tree wide_vectype = get_vectype_for_scalar_type (type);
|
||||
enum tree_code c1, c2;
|
||||
|
||||
*double_op = false;
|
||||
|
||||
/* The result of a vectorized widening operation usually requires two vectors
|
||||
(because the widened results do not fit int one vector). The generated
|
||||
vector results would normally be expected to be generated in the same
|
||||
|
@ -2264,12 +2272,57 @@ supportable_widening_operation (enum tree_code code, gimple stmt, tree vectype,
|
|||
|
||||
vec_mode = TYPE_MODE (vectype);
|
||||
if ((icode1 = optab_handler (optab1, vec_mode)->insn_code) == CODE_FOR_nothing
|
||||
|| insn_data[icode1].operand[0].mode != TYPE_MODE (wide_vectype)
|
||||
|| (icode2 = optab_handler (optab2, vec_mode)->insn_code)
|
||||
== CODE_FOR_nothing
|
||||
|| insn_data[icode2].operand[0].mode != TYPE_MODE (wide_vectype))
|
||||
== CODE_FOR_nothing)
|
||||
return false;
|
||||
|
||||
/* Check if it's a double cast, like char->int. In such case the intermediate
|
||||
type is short, and we check that char->short->int operaion is supported by
|
||||
the target. */
|
||||
if (insn_data[icode1].operand[0].mode != TYPE_MODE (wide_vectype)
|
||||
|| insn_data[icode2].operand[0].mode != TYPE_MODE (wide_vectype))
|
||||
{
|
||||
if (code == NOP_EXPR)
|
||||
{
|
||||
enum machine_mode intermediate_mode =
|
||||
insn_data[icode1].operand[0].mode;
|
||||
tree intermediate_type =
|
||||
lang_hooks.types.type_for_mode (intermediate_mode,
|
||||
TYPE_UNSIGNED (vectype));
|
||||
optab optab3 = optab_for_tree_code (c1, intermediate_type,
|
||||
optab_default);
|
||||
optab optab4 = optab_for_tree_code (c2, intermediate_type,
|
||||
optab_default);
|
||||
|
||||
if (!optab3 || !optab4)
|
||||
return false;
|
||||
|
||||
if ((icode1 = optab1->handlers[(int) vec_mode].insn_code)
|
||||
== CODE_FOR_nothing
|
||||
|| insn_data[icode1].operand[0].mode != intermediate_mode
|
||||
|| (icode2 = optab2->handlers[(int) vec_mode].insn_code)
|
||||
== CODE_FOR_nothing
|
||||
|| insn_data[icode2].operand[0].mode != intermediate_mode
|
||||
|| (icode1 = optab3->handlers[(int) intermediate_mode].insn_code)
|
||||
== CODE_FOR_nothing
|
||||
|| insn_data[icode1].operand[0].mode != TYPE_MODE (wide_vectype)
|
||||
|| (icode2 = optab4->handlers[(int) intermediate_mode].insn_code)
|
||||
== CODE_FOR_nothing
|
||||
|| insn_data[icode2].operand[0].mode != TYPE_MODE (wide_vectype))
|
||||
return false;
|
||||
else
|
||||
{
|
||||
*double_op = true;
|
||||
*interm_type = intermediate_type;
|
||||
*code1 = c1;
|
||||
*code2 = c2;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
*code1 = c1;
|
||||
*code2 = c2;
|
||||
return true;
|
||||
|
@ -2288,16 +2341,21 @@ supportable_widening_operation (enum tree_code code, gimple stmt, tree vectype,
|
|||
|
||||
Output:
|
||||
- CODE1 is the code of a vector operation to be used when
|
||||
vectorizing the operation, if available. */
|
||||
vectorizing the operation, if available.
|
||||
- DOUBLE_OP determines if the operation is a double cast, like
|
||||
int->short->char
|
||||
- INTERMIDIATE_TYPE is the intermediate type required to perform the
|
||||
widening operation (short in the above example) */
|
||||
|
||||
bool
|
||||
supportable_narrowing_operation (enum tree_code code,
|
||||
const_gimple stmt, const_tree vectype,
|
||||
enum tree_code *code1)
|
||||
enum tree_code *code1, bool *double_op,
|
||||
tree *intermediate_type)
|
||||
{
|
||||
enum machine_mode vec_mode;
|
||||
enum insn_code icode1;
|
||||
optab optab1;
|
||||
optab optab1, interm_optab;
|
||||
tree type = gimple_expr_type (stmt);
|
||||
tree narrow_vectype = get_vectype_for_scalar_type (type);
|
||||
enum tree_code c1;
|
||||
|
@ -2331,10 +2389,30 @@ supportable_narrowing_operation (enum tree_code code,
|
|||
return false;
|
||||
|
||||
vec_mode = TYPE_MODE (vectype);
|
||||
if ((icode1 = optab_handler (optab1, vec_mode)->insn_code) == CODE_FOR_nothing
|
||||
|| insn_data[icode1].operand[0].mode != TYPE_MODE (narrow_vectype))
|
||||
if ((icode1 = optab_handler (optab1, vec_mode)->insn_code)
|
||||
== CODE_FOR_nothing)
|
||||
return false;
|
||||
|
||||
/* In case of NUNITS_IN == NUNITS_OUT/4 check that the it is possible to
|
||||
perform the operation using an intermediate type of NUNITS_OUT/2. */
|
||||
if (insn_data[icode1].operand[0].mode != TYPE_MODE (narrow_vectype))
|
||||
{
|
||||
enum machine_mode intermediate_mode = insn_data[icode1].operand[0].mode;
|
||||
*intermediate_type = lang_hooks.types.type_for_mode (intermediate_mode,
|
||||
TYPE_UNSIGNED (vectype));
|
||||
interm_optab = optab_for_tree_code (VEC_PACK_TRUNC_EXPR,
|
||||
*intermediate_type, optab_default);
|
||||
if (!interm_optab)
|
||||
return false;
|
||||
|
||||
if ((icode1 = interm_optab->handlers[(int) intermediate_mode].insn_code)
|
||||
== CODE_FOR_nothing
|
||||
|| insn_data[icode1].operand[0].mode != TYPE_MODE (narrow_vectype))
|
||||
return false;
|
||||
|
||||
*double_op = true;
|
||||
}
|
||||
|
||||
*code1 = c1;
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -671,9 +671,9 @@ extern enum dr_alignment_support vect_supportable_dr_alignment
|
|||
(struct data_reference *);
|
||||
extern bool reduction_code_for_scalar_code (enum tree_code, enum tree_code *);
|
||||
extern bool supportable_widening_operation (enum tree_code, gimple, tree,
|
||||
tree *, tree *, enum tree_code *, enum tree_code *);
|
||||
tree *, tree *, enum tree_code *, enum tree_code *, bool *, tree *);
|
||||
extern bool supportable_narrowing_operation (enum tree_code, const_gimple,
|
||||
const_tree, enum tree_code *);
|
||||
const_tree, enum tree_code *, bool *, tree *);
|
||||
|
||||
/* Creation and deletion of loop and stmt info structs. */
|
||||
extern loop_vec_info new_loop_vec_info (struct loop *loop);
|
||||
|
|
Loading…
Add table
Reference in a new issue