tree-vectorizer.c (supportable_widening_operation): Support multi-step conversion...
* tree-vectorizer.c (supportable_widening_operation): Support multi-step conversion, return the number of steps in such conversion and the required intermediate types. (supportable_narrowing_operation): Likewise. * tree-vectorizer.h (vect_pow2): New function. (supportable_widening_operation): Change argument types. (supportable_narrowing_operation): Likewise. (vectorizable_type_promotion): Add an argument. (vectorizable_type_demotion): Likewise. * tree-vect-analyze.c (vect_analyze_operations): Call vectorizable_type_promotion and vectorizable_type_demotion with additional argument. (vect_get_and_check_slp_defs): Detect patterns. (vect_build_slp_tree): Add an argument, don't fail in case of multiple types. (vect_analyze_slp_instance): Don't fail in case of multiple types. Call vect_build_slp_tree with correct arguments. Calculate unrolling factor according to the smallest type in the loop. (vect_detect_hybrid_slp_stmts): Include statements from patterns. * tree-vect-patterns.c (vect_recog_widen_mult_pattern): Call supportable_widening_operation with correct arguments. * tree-vect-transform.c (vect_get_slp_defs): Allocate output vector operands lists according to the number of vector statements in left or right node, if exists. (vect_gen_widened_results_half): Remove unused argument. (vectorizable_conversion): Call supportable_widening_operation, supportable_narrowing_operation, and vect_gen_widened_results_half with correct arguments. (vectorizable_assignment): Change documentation, support multiple types in SLP. (vectorizable_operation): Likewise. (vect_get_loop_based_defs): New function. (vect_create_vectorized_demotion_stmts): Likewise. (vectorizable_type_demotion): Support loop-aware SLP and general multi-step conversion. Call vect_get_loop_based_defs and vect_create_vectorized_demotion_stmts for transformation. (vect_create_vectorized_promotion_stmts): New function. (vectorizable_type_promotion): Support loop-aware SLP and general multi-step conversion. Call vect_create_vectorized_promotion_stmts for transformation. (vectorizable_store): Change documentation, support multiple types in SLP. (vectorizable_load): Likewise. (vect_transform_stmt): Pass SLP_NODE to vectorizable_type_promotion and vectorizable_type_demotion. (vect_schedule_slp_instance): Move here the calculation of number of vectorized statements for each node from... (vect_schedule_slp): ... here. (vect_transform_loop): Call vect_schedule_slp without the last argument. From-SVN: r139225
This commit is contained in:
parent
45ea82c11f
commit
5d59337297
27 changed files with 1660 additions and 350 deletions
|
@ -1,3 +1,56 @@
|
|||
2008-08-19 Ira Rosen <irar@il.ibm.com>
|
||||
|
||||
* tree-vectorizer.c (supportable_widening_operation): Support
|
||||
multi-step conversion, return the number of steps in such conversion
|
||||
and the required intermediate types.
|
||||
(supportable_narrowing_operation): Likewise.
|
||||
* tree-vectorizer.h (vect_pow2): New function.
|
||||
(supportable_widening_operation): Change argument types.
|
||||
(supportable_narrowing_operation): Likewise.
|
||||
(vectorizable_type_promotion): Add an argument.
|
||||
(vectorizable_type_demotion): Likewise.
|
||||
* tree-vect-analyze.c (vect_analyze_operations): Call
|
||||
vectorizable_type_promotion and vectorizable_type_demotion with
|
||||
additional argument.
|
||||
(vect_get_and_check_slp_defs): Detect patterns.
|
||||
(vect_build_slp_tree): Add an argument, don't fail in case of multiple
|
||||
types.
|
||||
(vect_analyze_slp_instance): Don't fail in case of multiple types. Call
|
||||
vect_build_slp_tree with correct arguments. Calculate unrolling factor
|
||||
according to the smallest type in the loop.
|
||||
(vect_detect_hybrid_slp_stmts): Include statements from patterns.
|
||||
* tree-vect-patterns.c (vect_recog_widen_mult_pattern): Call
|
||||
supportable_widening_operation with correct arguments.
|
||||
* tree-vect-transform.c (vect_get_slp_defs): Allocate output vector
|
||||
operands lists according to the number of vector statements in left
|
||||
or right node, if exists.
|
||||
(vect_gen_widened_results_half): Remove unused argument.
|
||||
(vectorizable_conversion): Call supportable_widening_operation,
|
||||
supportable_narrowing_operation, and vect_gen_widened_results_half
|
||||
with correct arguments.
|
||||
(vectorizable_assignment): Change documentation, support multiple
|
||||
types in SLP.
|
||||
(vectorizable_operation): Likewise.
|
||||
(vect_get_loop_based_defs): New function.
|
||||
(vect_create_vectorized_demotion_stmts): Likewise.
|
||||
(vectorizable_type_demotion): Support loop-aware SLP and general
|
||||
multi-step conversion. Call vect_get_loop_based_defs and
|
||||
vect_create_vectorized_demotion_stmts for transformation.
|
||||
(vect_create_vectorized_promotion_stmts): New function.
|
||||
(vectorizable_type_promotion): Support loop-aware SLP and general
|
||||
multi-step conversion. Call vect_create_vectorized_promotion_stmts
|
||||
for transformation.
|
||||
(vectorizable_store): Change documentation, support multiple
|
||||
types in SLP.
|
||||
(vectorizable_load): Likewise.
|
||||
(vect_transform_stmt): Pass SLP_NODE to
|
||||
vectorizable_type_promotion and vectorizable_type_demotion.
|
||||
(vect_schedule_slp_instance): Move here the calculation of number
|
||||
of vectorized statements for each node from...
|
||||
(vect_schedule_slp): ... here.
|
||||
(vect_transform_loop): Call vect_schedule_slp without the last
|
||||
argument.
|
||||
|
||||
2008-08-19 Dorit Nuzman <dorit@il.ibm.com>
|
||||
|
||||
PR bootstrap/37152
|
||||
|
|
|
@ -1,3 +1,18 @@
|
|||
2008-08-19 Ira Rosen <irar@il.ibm.com>
|
||||
|
||||
* gcc.dg/vect/slp-multitypes-1.c: New testcase.
|
||||
* gcc.dg/vect/slp-multitypes-2.c, gcc.dg/vect/slp-multitypes-3.c,
|
||||
gcc.dg/vect/slp-multitypes-4.c, gcc.dg/vect/slp-multitypes-5.c,
|
||||
gcc.dg/vect/slp-multitypes-6.c, gcc.dg/vect/slp-multitypes-7.c,
|
||||
gcc.dg/vect/slp-multitypes-8.c, gcc.dg/vect/slp-multitypes-9.c,
|
||||
gcc.dg/vect/slp-multitypes-10.c, gcc.dg/vect/slp-multitypes-11.c,
|
||||
gcc.dg/vect/slp-multitypes-12.c, gcc.dg/vect/slp-widen-mult-u8.c,
|
||||
gcc.dg/vect/slp-widen-mult-s16.c, gcc.dg/vect/vect-multitypes-16.c,
|
||||
gcc.dg/vect/vect-multitypes-17.c: Likewise.
|
||||
* gcc.dg/vect/slp-9.c: Now vectorizable using SLP.
|
||||
* gcc.dg/vect/slp-14.c, gcc.dg/vect/slp-5.c: Likewise.
|
||||
* lib/target-supports.exp (check_effective_target_vect_long_long): New.
|
||||
|
||||
2008-08-18 Adam Nemet <anemet@caviumnetworks.com>
|
||||
|
||||
* gcc.target/mips/ext-1.c: Add -mgp64 to dg-mips-options.
|
||||
|
|
|
@ -15,7 +15,7 @@ main1 (int n)
|
|||
unsigned short in2[N*16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
|
||||
unsigned short out2[N*16];
|
||||
|
||||
/* Multiple types are not SLPable yet. */
|
||||
/* Multiple types are now SLPable. */
|
||||
for (i = 0; i < n; i++)
|
||||
{
|
||||
a0 = in[i*8] + 5;
|
||||
|
@ -110,9 +110,7 @@ int main (void)
|
|||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_strided && vect_int_mult } } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" {target { ! { vect_strided && vect_int_mult } } } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { xfail *-*-* } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_int_mult } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { target vect_int_mult } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
||||
|
|
|
@ -15,7 +15,7 @@ main1 ()
|
|||
unsigned short ia[N];
|
||||
unsigned int ib[N*2];
|
||||
|
||||
/* Not SLPable for now: multiple types with SLP of the smaller type. */
|
||||
/* Multiple types with SLP of the smaller type. */
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
out[i*8] = in[i*8];
|
||||
|
@ -121,8 +121,7 @@ int main (void)
|
|||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { target { vect_strided_wide } } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" { target { ! { vect_strided_wide } } } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
||||
|
|
|
@ -41,7 +41,7 @@ int main (void)
|
|||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_strided && vect_widen_mult_hi_to_si } } } }*/
|
||||
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_widen_mult_hi_to_si } } }*/
|
||||
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_widen_mult_hi_to_si } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
||||
|
|
58
gcc/testsuite/gcc.dg/vect/slp-multitypes-1.c
Normal file
58
gcc/testsuite/gcc.dg/vect/slp-multitypes-1.c
Normal file
|
@ -0,0 +1,58 @@
|
|||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 128
|
||||
|
||||
__attribute__ ((noinline)) int
|
||||
main1 ()
|
||||
{
|
||||
int i;
|
||||
unsigned short sout[N*8];
|
||||
unsigned int iout[N*8];
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
sout[i*4] = 8;
|
||||
sout[i*4 + 1] = 18;
|
||||
sout[i*4 + 2] = 28;
|
||||
sout[i*4 + 3] = 38;
|
||||
|
||||
iout[i*4] = 8;
|
||||
iout[i*4 + 1] = 18;
|
||||
iout[i*4 + 2] = 28;
|
||||
iout[i*4 + 3] = 38;
|
||||
}
|
||||
|
||||
/* check results: */
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
if (sout[i*4] != 8
|
||||
|| sout[i*4 + 1] != 18
|
||||
|| sout[i*4 + 2] != 28
|
||||
|| sout[i*4 + 3] != 38
|
||||
|| iout[i*4] != 8
|
||||
|| iout[i*4 + 1] != 18
|
||||
|| iout[i*4 + 2] != 28
|
||||
|| iout[i*4 + 3] != 38)
|
||||
abort ();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
|
||||
main1 ();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
52
gcc/testsuite/gcc.dg/vect/slp-multitypes-10.c
Normal file
52
gcc/testsuite/gcc.dg/vect/slp-multitypes-10.c
Normal file
|
@ -0,0 +1,52 @@
|
|||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 8
|
||||
|
||||
unsigned int in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
|
||||
|
||||
struct s
|
||||
{
|
||||
unsigned char a;
|
||||
unsigned char b;
|
||||
};
|
||||
|
||||
__attribute__ ((noinline)) int
|
||||
main1 ()
|
||||
{
|
||||
int i;
|
||||
struct s out[N*4];
|
||||
|
||||
for (i = 0; i < N*4; i++)
|
||||
{
|
||||
out[i].a = (unsigned char) in[i*2] + 1;
|
||||
out[i].b = (unsigned char) in[i*2 + 1] + 2;
|
||||
}
|
||||
|
||||
/* check results: */
|
||||
for (i = 0; i < N*4; i++)
|
||||
{
|
||||
if (out[i].a != (unsigned char) in[i*2] + 1
|
||||
|| out[i].b != (unsigned char) in[i*2 + 1] + 2)
|
||||
abort ();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
|
||||
main1 ();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_pack_trunc } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_pack_trunc } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
55
gcc/testsuite/gcc.dg/vect/slp-multitypes-11.c
Normal file
55
gcc/testsuite/gcc.dg/vect/slp-multitypes-11.c
Normal file
|
@ -0,0 +1,55 @@
|
|||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 18
|
||||
|
||||
struct s
|
||||
{
|
||||
int a;
|
||||
int b;
|
||||
int c;
|
||||
};
|
||||
|
||||
char in[N*3] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53};
|
||||
|
||||
__attribute__ ((noinline)) int
|
||||
main1 ()
|
||||
{
|
||||
int i;
|
||||
struct s out[N];
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
out[i].a = (int) in[i*3] + 1;
|
||||
out[i].b = (int) in[i*3 + 1] + 2;
|
||||
out[i].c = (int) in[i*3 + 2] + 3;
|
||||
}
|
||||
|
||||
/* check results: */
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
if (out[i].a != (int) in[i*3] + 1
|
||||
|| out[i].b != (int) in[i*3 + 1] + 2
|
||||
|| out[i].c != (int) in[i*3 + 2] + 3)
|
||||
abort ();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
|
||||
main1 ();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_unpack } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_unpack } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
68
gcc/testsuite/gcc.dg/vect/slp-multitypes-12.c
Normal file
68
gcc/testsuite/gcc.dg/vect/slp-multitypes-12.c
Normal file
|
@ -0,0 +1,68 @@
|
|||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 128
|
||||
|
||||
__attribute__ ((noinline)) int
|
||||
main1 ()
|
||||
{
|
||||
int i;
|
||||
unsigned short sout[N*8];
|
||||
unsigned int iout[N*8];
|
||||
unsigned char cout[N*8];
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
sout[i*4] = 8;
|
||||
sout[i*4 + 1] = 18;
|
||||
sout[i*4 + 2] = 28;
|
||||
sout[i*4 + 3] = 38;
|
||||
|
||||
iout[i*4] = 8;
|
||||
iout[i*4 + 1] = 18;
|
||||
iout[i*4 + 2] = 28;
|
||||
iout[i*4 + 3] = 38;
|
||||
|
||||
cout[i*4] = 1;
|
||||
cout[i*4 + 1] = 2;
|
||||
cout[i*4 + 2] = 3;
|
||||
cout[i*4 + 3] = 4;
|
||||
}
|
||||
|
||||
/* check results: */
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
if (sout[i*4] != 8
|
||||
|| sout[i*4 + 1] != 18
|
||||
|| sout[i*4 + 2] != 28
|
||||
|| sout[i*4 + 3] != 38
|
||||
|| iout[i*4] != 8
|
||||
|| iout[i*4 + 1] != 18
|
||||
|| iout[i*4 + 2] != 28
|
||||
|| iout[i*4 + 3] != 38
|
||||
|| cout[i*4] != 1
|
||||
|| cout[i*4 + 1] != 2
|
||||
|| cout[i*4 + 2] != 3
|
||||
|| cout[i*4 + 3] != 4)
|
||||
abort ();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
|
||||
main1 ();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
83
gcc/testsuite/gcc.dg/vect/slp-multitypes-2.c
Normal file
83
gcc/testsuite/gcc.dg/vect/slp-multitypes-2.c
Normal file
|
@ -0,0 +1,83 @@
|
|||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 128
|
||||
|
||||
__attribute__ ((noinline)) int
|
||||
main1 (unsigned short a0, unsigned short a1, unsigned short a2,
|
||||
unsigned short a3, unsigned short a4, unsigned short a5,
|
||||
unsigned short a6, unsigned short a7, unsigned short a8,
|
||||
unsigned short a9, unsigned short a10, unsigned short a11,
|
||||
unsigned short a12, unsigned short a13, unsigned short a14,
|
||||
unsigned short a15, unsigned char b0, unsigned char b1)
|
||||
{
|
||||
int i;
|
||||
unsigned short out[N*16];
|
||||
unsigned char out2[N*16];
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
out[i*16] = a8;
|
||||
out[i*16 + 1] = a7;
|
||||
out[i*16 + 2] = a1;
|
||||
out[i*16 + 3] = a2;
|
||||
out[i*16 + 4] = a8;
|
||||
out[i*16 + 5] = a5;
|
||||
out[i*16 + 6] = a5;
|
||||
out[i*16 + 7] = a4;
|
||||
out[i*16 + 8] = a12;
|
||||
out[i*16 + 9] = a13;
|
||||
out[i*16 + 10] = a14;
|
||||
out[i*16 + 11] = a15;
|
||||
out[i*16 + 12] = a6;
|
||||
out[i*16 + 13] = a9;
|
||||
out[i*16 + 14] = a0;
|
||||
out[i*16 + 15] = a7;
|
||||
|
||||
out2[i*2] = b1;
|
||||
out2[i*2+1] = b0;
|
||||
}
|
||||
|
||||
/* check results: */
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
if (out[i*16] != a8
|
||||
|| out[i*16 + 1] != a7
|
||||
|| out[i*16 + 2] != a1
|
||||
|| out[i*16 + 3] != a2
|
||||
|| out[i*16 + 4] != a8
|
||||
|| out[i*16 + 5] != a5
|
||||
|| out[i*16 + 6] != a5
|
||||
|| out[i*16 + 7] != a4
|
||||
|| out[i*16 + 8] != a12
|
||||
|| out[i*16 + 9] != a13
|
||||
|| out[i*16 + 10] != a14
|
||||
|| out[i*16 + 11] != a15
|
||||
|| out[i*16 + 12] != a6
|
||||
|| out[i*16 + 13] != a9
|
||||
|| out[i*16 + 14] != a0
|
||||
|| out[i*16 + 15] != a7
|
||||
|| out2[i*2] != b1
|
||||
|| out2[i*2 + 1] != b0)
|
||||
abort ();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
|
||||
main1 (15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,20,21);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
94
gcc/testsuite/gcc.dg/vect/slp-multitypes-3.c
Normal file
94
gcc/testsuite/gcc.dg/vect/slp-multitypes-3.c
Normal file
|
@ -0,0 +1,94 @@
|
|||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 8
|
||||
unsigned int in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
|
||||
unsigned char in2[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
|
||||
|
||||
__attribute__ ((noinline)) int
|
||||
main1 ()
|
||||
{
|
||||
int i;
|
||||
unsigned int out[N*8];
|
||||
unsigned char out2[N*8];
|
||||
|
||||
for (i = 0; i < N/2; i++)
|
||||
{
|
||||
out[i*8] = in[i*8] + 5;
|
||||
out[i*8 + 1] = in[i*8 + 1] + 6;
|
||||
out[i*8 + 2] = in[i*8 + 2] + 7;
|
||||
out[i*8 + 3] = in[i*8 + 3] + 8;
|
||||
out[i*8 + 4] = in[i*8 + 4] + 9;
|
||||
out[i*8 + 5] = in[i*8 + 5] + 10;
|
||||
out[i*8 + 6] = in[i*8 + 6] + 11;
|
||||
out[i*8 + 7] = in[i*8 + 7] + 12;
|
||||
|
||||
out2[i*16] = in2[i*16] + 2;
|
||||
out2[i*16 + 1] = in2[i*16 + 1] + 3;
|
||||
out2[i*16 + 2] = in2[i*16 + 2] + 4;
|
||||
out2[i*16 + 3] = in2[i*16 + 3] + 3;
|
||||
out2[i*16 + 4] = in2[i*16 + 4] + 2;
|
||||
out2[i*16 + 5] = in2[i*16 + 5] + 3;
|
||||
out2[i*16 + 6] = in2[i*16 + 6] + 2;
|
||||
out2[i*16 + 7] = in2[i*16 + 7] + 4;
|
||||
out2[i*16 + 8] = in2[i*16 + 8] + 2;
|
||||
out2[i*16 + 9] = in2[i*16 + 9] + 5;
|
||||
out2[i*16 + 10] = in2[i*16 + 10] + 2;
|
||||
out2[i*16 + 11] = in2[i*16 + 11] + 3;
|
||||
out2[i*16 + 12] = in2[i*16 + 12] + 4;
|
||||
out2[i*16 + 13] = in2[i*16 + 13] + 4;
|
||||
out2[i*16 + 14] = in2[i*16 + 14] + 3;
|
||||
out2[i*16 + 15] = in2[i*16 + 15] + 2;
|
||||
|
||||
}
|
||||
|
||||
/* check results: */
|
||||
for (i = 0; i < N/2; i++)
|
||||
{
|
||||
if (out[i*8] != in[i*8] + 5
|
||||
|| out[i*8 + 1] != in[i*8 + 1] + 6
|
||||
|| out[i*8 + 2] != in[i*8 + 2] + 7
|
||||
|| out[i*8 + 3] != in[i*8 + 3] + 8
|
||||
|| out[i*8 + 4] != in[i*8 + 4] + 9
|
||||
|| out[i*8 + 5] != in[i*8 + 5] + 10
|
||||
|| out[i*8 + 6] != in[i*8 + 6] + 11
|
||||
|| out[i*8 + 7] != in[i*8 + 7] + 12
|
||||
|| out2[i*16] != in2[i*16] + 2
|
||||
|| out2[i*16 + 1] != in2[i*16 + 1] + 3
|
||||
|| out2[i*16 + 2] != in2[i*16 + 2] + 4
|
||||
|| out2[i*16 + 3] != in2[i*16 + 3] + 3
|
||||
|| out2[i*16 + 4] != in2[i*16 + 4] + 2
|
||||
|| out2[i*16 + 5] != in2[i*16 + 5] + 3
|
||||
|| out2[i*16 + 6] != in2[i*16 + 6] + 2
|
||||
|| out2[i*16 + 7] != in2[i*16 + 7] + 4
|
||||
|| out2[i*16 + 8] != in2[i*16 + 8] + 2
|
||||
|| out2[i*16 + 9] != in2[i*16 + 9] + 5
|
||||
|| out2[i*16 + 10] != in2[i*16 + 10] + 2
|
||||
|| out2[i*16 + 11] != in2[i*16 + 11] + 3
|
||||
|| out2[i*16 + 12] != in2[i*16 + 12] + 4
|
||||
|| out2[i*16 + 13] != in2[i*16 + 13] + 4
|
||||
|| out2[i*16 + 14] != in2[i*16 + 14] + 3
|
||||
|| out2[i*16 + 15] != in2[i*16 + 15] + 2)
|
||||
|
||||
abort ();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
|
||||
main1 ();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
58
gcc/testsuite/gcc.dg/vect/slp-multitypes-4.c
Normal file
58
gcc/testsuite/gcc.dg/vect/slp-multitypes-4.c
Normal file
|
@ -0,0 +1,58 @@
|
|||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 8
|
||||
|
||||
short in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
|
||||
|
||||
__attribute__ ((noinline)) int
|
||||
main1 ()
|
||||
{
|
||||
int i;
|
||||
int out[N*8];
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
out[i*8] = (int) in[i*8] + 1;
|
||||
out[i*8 + 1] = (int) in[i*8 + 1] + 2;
|
||||
out[i*8 + 2] = (int) in[i*8 + 2] + 3;
|
||||
out[i*8 + 3] = (int) in[i*8 + 3] + 4;
|
||||
out[i*8 + 4] = (int) in[i*8 + 4] + 5;
|
||||
out[i*8 + 5] = (int) in[i*8 + 5] + 6;
|
||||
out[i*8 + 6] = (int) in[i*8 + 6] + 7;
|
||||
out[i*8 + 7] = (int) in[i*8 + 7] + 8;
|
||||
}
|
||||
|
||||
/* check results: */
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
if (out[i*8] != (int) in[i*8] + 1
|
||||
|| out[i*8 + 1] != (int) in[i*8 + 1] + 2
|
||||
|| out[i*8 + 2] != (int) in[i*8 + 2] + 3
|
||||
|| out[i*8 + 3] != (int) in[i*8 + 3] + 4
|
||||
|| out[i*8 + 4] != (int) in[i*8 + 4] + 5
|
||||
|| out[i*8 + 5] != (int) in[i*8 + 5] + 6
|
||||
|| out[i*8 + 6] != (int) in[i*8 + 6] + 7
|
||||
|| out[i*8 + 7] != (int) in[i*8 + 7] + 8)
|
||||
abort ();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
|
||||
main1 ();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_unpack } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_unpack } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
58
gcc/testsuite/gcc.dg/vect/slp-multitypes-5.c
Normal file
58
gcc/testsuite/gcc.dg/vect/slp-multitypes-5.c
Normal file
|
@ -0,0 +1,58 @@
|
|||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 8
|
||||
|
||||
short in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
|
||||
|
||||
__attribute__ ((noinline)) int
|
||||
main1 ()
|
||||
{
|
||||
int i;
|
||||
int out[N*8];
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
out[i*8] = (short) in[i*8] + 1;
|
||||
out[i*8 + 1] = (short) in[i*8 + 1] + 2;
|
||||
out[i*8 + 2] = (short) in[i*8 + 2] + 3;
|
||||
out[i*8 + 3] = (short) in[i*8 + 3] + 4;
|
||||
out[i*8 + 4] = (short) in[i*8 + 4] + 5;
|
||||
out[i*8 + 5] = (short) in[i*8 + 5] + 6;
|
||||
out[i*8 + 6] = (short) in[i*8 + 6] + 7;
|
||||
out[i*8 + 7] = (short) in[i*8 + 7] + 8;
|
||||
}
|
||||
|
||||
/* check results: */
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
if (out[i*8] != (short) in[i*8] + 1
|
||||
|| out[i*8 + 1] != (short) in[i*8 + 1] + 2
|
||||
|| out[i*8 + 2] != (short) in[i*8 + 2] + 3
|
||||
|| out[i*8 + 3] != (short) in[i*8 + 3] + 4
|
||||
|| out[i*8 + 4] != (short) in[i*8 + 4] + 5
|
||||
|| out[i*8 + 5] != (short) in[i*8 + 5] + 6
|
||||
|| out[i*8 + 6] != (short) in[i*8 + 6] + 7
|
||||
|| out[i*8 + 7] != (short) in[i*8 + 7] + 8)
|
||||
abort ();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
|
||||
main1 ();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_pack_trunc } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_pack_trunc } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
58
gcc/testsuite/gcc.dg/vect/slp-multitypes-6.c
Normal file
58
gcc/testsuite/gcc.dg/vect/slp-multitypes-6.c
Normal file
|
@ -0,0 +1,58 @@
|
|||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 8
|
||||
|
||||
unsigned int in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
|
||||
|
||||
__attribute__ ((noinline)) int
|
||||
main1 ()
|
||||
{
|
||||
int i;
|
||||
unsigned char out[N*8];
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
out[i*8] = (unsigned char) in[i*8] + 1;
|
||||
out[i*8 + 1] = (unsigned char) in[i*8 + 1] + 2;
|
||||
out[i*8 + 2] = (unsigned char) in[i*8 + 2] + 3;
|
||||
out[i*8 + 3] = (unsigned char) in[i*8 + 3] + 4;
|
||||
out[i*8 + 4] = (unsigned char) in[i*8 + 4] + 5;
|
||||
out[i*8 + 5] = (unsigned char) in[i*8 + 5] + 6;
|
||||
out[i*8 + 6] = (unsigned char) in[i*8 + 6] + 7;
|
||||
out[i*8 + 7] = (unsigned char) in[i*8 + 7] + 8;
|
||||
}
|
||||
|
||||
/* check results: */
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
if (out[i*8] != (unsigned char) in[i*8] + 1
|
||||
|| out[i*8 + 1] != (unsigned char) in[i*8 + 1] + 2
|
||||
|| out[i*8 + 2] != (unsigned char) in[i*8 + 2] + 3
|
||||
|| out[i*8 + 3] != (unsigned char) in[i*8 + 3] + 4
|
||||
|| out[i*8 + 4] != (unsigned char) in[i*8 + 4] + 5
|
||||
|| out[i*8 + 5] != (unsigned char) in[i*8 + 5] + 6
|
||||
|| out[i*8 + 6] != (unsigned char) in[i*8 + 6] + 7
|
||||
|| out[i*8 + 7] != (unsigned char) in[i*8 + 7] + 8)
|
||||
abort ();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
|
||||
main1 ();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_pack_trunc } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_pack_trunc } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
58
gcc/testsuite/gcc.dg/vect/slp-multitypes-7.c
Normal file
58
gcc/testsuite/gcc.dg/vect/slp-multitypes-7.c
Normal file
|
@ -0,0 +1,58 @@
|
|||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 8
|
||||
|
||||
char in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
|
||||
|
||||
__attribute__ ((noinline)) int
|
||||
main1 ()
|
||||
{
|
||||
int i;
|
||||
int out[N*8];
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
out[i*8] = (int) in[i*8] + 1;
|
||||
out[i*8 + 1] = (int) in[i*8 + 1] + 2;
|
||||
out[i*8 + 2] = (int) in[i*8 + 2] + 3;
|
||||
out[i*8 + 3] = (int) in[i*8 + 3] + 4;
|
||||
out[i*8 + 4] = (int) in[i*8 + 4] + 5;
|
||||
out[i*8 + 5] = (int) in[i*8 + 5] + 6;
|
||||
out[i*8 + 6] = (int) in[i*8 + 6] + 7;
|
||||
out[i*8 + 7] = (int) in[i*8 + 7] + 8;
|
||||
}
|
||||
|
||||
/* check results: */
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
if (out[i*8] != (int) in[i*8] + 1
|
||||
|| out[i*8 + 1] != (int) in[i*8 + 1] + 2
|
||||
|| out[i*8 + 2] != (int) in[i*8 + 2] + 3
|
||||
|| out[i*8 + 3] != (int) in[i*8 + 3] + 4
|
||||
|| out[i*8 + 4] != (int) in[i*8 + 4] + 5
|
||||
|| out[i*8 + 5] != (int) in[i*8 + 5] + 6
|
||||
|| out[i*8 + 6] != (int) in[i*8 + 6] + 7
|
||||
|| out[i*8 + 7] != (int) in[i*8 + 7] + 8)
|
||||
abort ();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
|
||||
main1 ();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_unpack } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_unpack } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
46
gcc/testsuite/gcc.dg/vect/slp-multitypes-8.c
Normal file
46
gcc/testsuite/gcc.dg/vect/slp-multitypes-8.c
Normal file
|
@ -0,0 +1,46 @@
|
|||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 8
|
||||
|
||||
char in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
|
||||
|
||||
__attribute__ ((noinline)) int
|
||||
main1 ()
|
||||
{
|
||||
int i;
|
||||
int out[N*8];
|
||||
|
||||
for (i = 0; i < N*4; i++)
|
||||
{
|
||||
out[i*2] = (int) in[i*2] + 1;
|
||||
out[i*2 + 1] = (int) in[i*2 + 1] + 2;
|
||||
}
|
||||
|
||||
/* check results: */
|
||||
for (i = 0; i < N*4; i++)
|
||||
{
|
||||
if (out[i*2] != (int) in[i*2] + 1
|
||||
|| out[i*2 + 1] != (int) in[i*2 + 1] + 2)
|
||||
abort ();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
|
||||
main1 ();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_unpack } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_unpack } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
46
gcc/testsuite/gcc.dg/vect/slp-multitypes-9.c
Normal file
46
gcc/testsuite/gcc.dg/vect/slp-multitypes-9.c
Normal file
|
@ -0,0 +1,46 @@
|
|||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 8
|
||||
|
||||
unsigned int in[N*8] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63};
|
||||
|
||||
__attribute__ ((noinline)) int
|
||||
main1 ()
|
||||
{
|
||||
int i;
|
||||
unsigned char out[N*8];
|
||||
|
||||
for (i = 0; i < N*4; i++)
|
||||
{
|
||||
out[i*2] = (unsigned char) in[i*2] + 1;
|
||||
out[i*2 + 1] = (unsigned char) in[i*2 + 1] + 2;
|
||||
}
|
||||
|
||||
/* check results: */
|
||||
for (i = 0; i < N*4; i++)
|
||||
{
|
||||
if (out[i*2] != (unsigned char) in[i*2] + 1
|
||||
|| out[i*2 + 1] != (unsigned char) in[i*2 + 1] + 2)
|
||||
abort ();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
|
||||
main1 ();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_pack_trunc } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_pack_trunc } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
47
gcc/testsuite/gcc.dg/vect/slp-widen-mult-s16.c
Normal file
47
gcc/testsuite/gcc.dg/vect/slp-widen-mult-s16.c
Normal file
|
@ -0,0 +1,47 @@
|
|||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 64
|
||||
|
||||
short X[N] __attribute__ ((__aligned__(16)));
|
||||
short Y[N] __attribute__ ((__aligned__(16)));
|
||||
int result[N];
|
||||
|
||||
/* short->int widening-mult */
|
||||
__attribute__ ((noinline)) int
|
||||
foo1(int len) {
|
||||
int i;
|
||||
|
||||
for (i=0; i<len/2; i++) {
|
||||
result[2*i] = X[2*i] * Y[2*i];
|
||||
result[2*i+1] = X[2*i+1] * Y[2*i+1];
|
||||
}
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
check_vect ();
|
||||
|
||||
for (i=0; i<N; i++) {
|
||||
X[i] = i;
|
||||
Y[i] = 64-i;
|
||||
}
|
||||
|
||||
foo1 (N);
|
||||
|
||||
for (i=0; i<N; i++) {
|
||||
if (result[i] != X[i] * Y[i])
|
||||
abort ();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_widen_mult_hi_to_si || vect_inpack } } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_widen_mult_hi_to_si || vect_inpack } } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
47
gcc/testsuite/gcc.dg/vect/slp-widen-mult-u8.c
Normal file
47
gcc/testsuite/gcc.dg/vect/slp-widen-mult-u8.c
Normal file
|
@ -0,0 +1,47 @@
|
|||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 64
|
||||
|
||||
unsigned char X[N] __attribute__ ((__aligned__(16)));
|
||||
unsigned char Y[N] __attribute__ ((__aligned__(16)));
|
||||
unsigned short result[N];
|
||||
|
||||
/* char->short widening-mult */
|
||||
__attribute__ ((noinline)) int
|
||||
foo1(int len) {
|
||||
int i;
|
||||
|
||||
for (i=0; i<len/2; i++) {
|
||||
result[2*i] = X[2*i] * Y[2*i];
|
||||
result[2*i+1] = X[2*i+1] * Y[2*i+1];
|
||||
}
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
check_vect ();
|
||||
|
||||
for (i=0; i<N; i++) {
|
||||
X[i] = i;
|
||||
Y[i] = 64-i;
|
||||
}
|
||||
|
||||
foo1 (N);
|
||||
|
||||
for (i=0; i<N; i++) {
|
||||
if (result[i] != X[i] * Y[i])
|
||||
abort ();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target { vect_widen_mult_qi_to_hi || vect_unpack } } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_widen_mult_hi_to_si || vect_inpack } } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
44
gcc/testsuite/gcc.dg/vect/vect-multitypes-16.c
Normal file
44
gcc/testsuite/gcc.dg/vect/vect-multitypes-16.c
Normal file
|
@ -0,0 +1,44 @@
|
|||
/* { dg-require-effective-target vect_long_long } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 64
|
||||
|
||||
char x[N] __attribute__ ((__aligned__(16)));
|
||||
|
||||
__attribute__ ((noinline)) int
|
||||
foo (int len, long long *z) {
|
||||
int i;
|
||||
|
||||
for (i=0; i<len; i++) {
|
||||
z[i] = x[i];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int main (void)
|
||||
{
|
||||
char i;
|
||||
long long z[N+4];
|
||||
|
||||
check_vect ();
|
||||
|
||||
for (i=0; i<N; i++) {
|
||||
x[i] = i;
|
||||
}
|
||||
|
||||
foo (N,z+2);
|
||||
|
||||
for (i=0; i<N; i++) {
|
||||
if (z[i+2] != x[i])
|
||||
abort ();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target vect_unpack } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { ! vect_unpack } } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
52
gcc/testsuite/gcc.dg/vect/vect-multitypes-17.c
Normal file
52
gcc/testsuite/gcc.dg/vect/vect-multitypes-17.c
Normal file
|
@ -0,0 +1,52 @@
|
|||
/* { dg-require-effective-target vect_long_long } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 64
|
||||
|
||||
unsigned char uX[N] __attribute__ ((__aligned__(16)));
|
||||
unsigned char uresultX[N];
|
||||
unsigned long long uY[N] __attribute__ ((__aligned__(16)));
|
||||
unsigned char uresultY[N];
|
||||
|
||||
/* Unsigned type demotion (si->qi) */
|
||||
|
||||
__attribute__ ((noinline)) int
|
||||
foo1(int len) {
|
||||
int i;
|
||||
|
||||
for (i=0; i<len; i++) {
|
||||
uresultX[i] = uX[i];
|
||||
uresultY[i] = (unsigned char)uY[i];
|
||||
}
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
check_vect ();
|
||||
|
||||
for (i=0; i<N; i++) {
|
||||
uX[i] = 16-i;
|
||||
uY[i] = 16-i;
|
||||
if (i%5 == 0)
|
||||
uX[i] = 16-i;
|
||||
}
|
||||
|
||||
foo1 (N);
|
||||
|
||||
for (i=0; i<N; i++) {
|
||||
if (uresultX[i] != uX[i])
|
||||
abort ();
|
||||
if (uresultY[i] != (unsigned char)uY[i])
|
||||
abort ();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_pack_trunc } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
|
@ -1526,6 +1526,29 @@ proc check_effective_target_vect_double { } {
|
|||
return $et_vect_double_saved
|
||||
}
|
||||
|
||||
# Return 1 if the target supports hardware vectors of long long, 0 otherwise.
|
||||
#
|
||||
# This won't change for different subtargets so cache the result.
|
||||
|
||||
proc check_effective_target_vect_long_long { } {
|
||||
global et_vect_long_long_saved
|
||||
|
||||
if [info exists et_vect_long_long_saved] {
|
||||
verbose "check_effective_target_vect_long_long: using cached result" 2
|
||||
} else {
|
||||
set et_vect_long_long_saved 0
|
||||
if { [istarget i?86-*-*]
|
||||
|| [istarget x86_64-*-*]
|
||||
|| [istarget spu-*-*] } {
|
||||
set et_vect_long_long_saved 1
|
||||
}
|
||||
}
|
||||
|
||||
verbose "check_effective_target_vect_long_long: returning $et_vect_long_long_saved" 2
|
||||
return $et_vect_long_long_saved
|
||||
}
|
||||
|
||||
|
||||
# Return 1 if the target plus current options does not support a vector
|
||||
# max instruction on "int", 0 otherwise.
|
||||
#
|
||||
|
|
|
@ -462,8 +462,8 @@ vect_analyze_operations (loop_vec_info loop_vinfo)
|
|||
ok = true;
|
||||
if (STMT_VINFO_RELEVANT_P (stmt_info)
|
||||
|| STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
|
||||
ok = (vectorizable_type_promotion (stmt, NULL, NULL)
|
||||
|| vectorizable_type_demotion (stmt, NULL, NULL)
|
||||
ok = (vectorizable_type_promotion (stmt, NULL, NULL, NULL)
|
||||
|| vectorizable_type_demotion (stmt, NULL, NULL, NULL)
|
||||
|| vectorizable_conversion (stmt, NULL, NULL, NULL)
|
||||
|| vectorizable_operation (stmt, NULL, NULL, NULL)
|
||||
|| vectorizable_assignment (stmt, NULL, NULL, NULL)
|
||||
|
@ -2497,7 +2497,8 @@ vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, slp_tree slp_node,
|
|||
tree *first_stmt_def0_type,
|
||||
tree *first_stmt_def1_type,
|
||||
tree *first_stmt_const_oprnd,
|
||||
int ncopies_for_cost)
|
||||
int ncopies_for_cost,
|
||||
bool *pattern0, bool *pattern1)
|
||||
{
|
||||
tree oprnd;
|
||||
unsigned int i, number_of_oprnds;
|
||||
|
@ -2527,6 +2528,58 @@ vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, slp_tree slp_node,
|
|||
return false;
|
||||
}
|
||||
|
||||
/* Check if DEF_STMT is a part of a pattern and get the def stmt from
|
||||
the pattern. Check that all the stmts of the node are in the
|
||||
pattern. */
|
||||
if (def_stmt && vinfo_for_stmt (def_stmt)
|
||||
&& STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt)))
|
||||
{
|
||||
if (!*first_stmt_dt0)
|
||||
*pattern0 = true;
|
||||
else
|
||||
{
|
||||
if (i == 1 && !*first_stmt_dt1)
|
||||
*pattern1 = true;
|
||||
else if ((i == 0 && !*pattern0) || (i == 1 && !*pattern1))
|
||||
{
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
{
|
||||
fprintf (vect_dump, "Build SLP failed: some of the stmts"
|
||||
" are in a pattern, and others are not ");
|
||||
print_generic_expr (vect_dump, oprnd, TDF_SLIM);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
|
||||
dt[i] = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
|
||||
|
||||
if (*dt == vect_unknown_def_type)
|
||||
{
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
fprintf (vect_dump, "Unsupported pattern.");
|
||||
return false;
|
||||
}
|
||||
|
||||
switch (gimple_code (def_stmt))
|
||||
{
|
||||
case GIMPLE_PHI:
|
||||
def = gimple_phi_result (def_stmt);
|
||||
break;
|
||||
|
||||
case GIMPLE_ASSIGN:
|
||||
def = gimple_assign_lhs (def_stmt);
|
||||
break;
|
||||
|
||||
default:
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
fprintf (vect_dump, "unsupported defining stmt: ");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (!*first_stmt_dt0)
|
||||
{
|
||||
/* op0 of the first stmt of the group - store its info. */
|
||||
|
@ -2624,15 +2677,13 @@ vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, slp_tree slp_node,
|
|||
/* Recursively build an SLP tree starting from NODE.
|
||||
Fail (and return FALSE) if def-stmts are not isomorphic, require data
|
||||
permutation or are of unsupported types of operation. Otherwise, return
|
||||
TRUE.
|
||||
SLP_IMPOSSIBLE is TRUE if it is impossible to SLP in the loop, for example
|
||||
in the case of multiple types for now. */
|
||||
TRUE. */
|
||||
|
||||
static bool
|
||||
vect_build_slp_tree (loop_vec_info loop_vinfo, slp_tree *node,
|
||||
unsigned int group_size, bool *slp_impossible,
|
||||
unsigned int group_size,
|
||||
int *inside_cost, int *outside_cost,
|
||||
int ncopies_for_cost)
|
||||
int ncopies_for_cost, unsigned int *max_nunits)
|
||||
{
|
||||
VEC (gimple, heap) *def_stmts0 = VEC_alloc (gimple, heap, group_size);
|
||||
VEC (gimple, heap) *def_stmts1 = VEC_alloc (gimple, heap, group_size);
|
||||
|
@ -2653,6 +2704,7 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, slp_tree *node,
|
|||
enum machine_mode vec_mode;
|
||||
tree first_stmt_const_oprnd = NULL_TREE;
|
||||
struct data_reference *first_dr;
|
||||
bool pattern0 = false, pattern1 = false;
|
||||
|
||||
/* For every stmt in NODE find its def stmt/s. */
|
||||
for (i = 0; VEC_iterate (gimple, stmts, i, stmt); i++)
|
||||
|
@ -2691,16 +2743,13 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, slp_tree *node,
|
|||
gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
|
||||
vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
|
||||
ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
|
||||
if (ncopies > 1)
|
||||
{
|
||||
/* FORNOW. */
|
||||
if (vect_print_dump_info (REPORT_SLP))
|
||||
fprintf (vect_dump, "SLP failed - multiple types ");
|
||||
|
||||
*slp_impossible = true;
|
||||
return false;
|
||||
}
|
||||
if (ncopies > 1 && vect_print_dump_info (REPORT_SLP))
|
||||
fprintf (vect_dump, "SLP with multiple types ");
|
||||
|
||||
/* In case of multiple types we need to detect the smallest type. */
|
||||
if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
|
||||
*max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
|
||||
|
||||
if (is_gimple_call (stmt))
|
||||
rhs_code = CALL_EXPR;
|
||||
else
|
||||
|
@ -2799,7 +2848,8 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, slp_tree *node,
|
|||
&first_stmt_def0_type,
|
||||
&first_stmt_def1_type,
|
||||
&first_stmt_const_oprnd,
|
||||
ncopies_for_cost))
|
||||
ncopies_for_cost,
|
||||
&pattern0, &pattern1))
|
||||
return false;
|
||||
}
|
||||
else
|
||||
|
@ -2807,6 +2857,11 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, slp_tree *node,
|
|||
/* Load. */
|
||||
if (i == 0)
|
||||
{
|
||||
/* In case of multiple types we need to detect the smallest
|
||||
type. */
|
||||
if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
|
||||
*max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
|
||||
|
||||
/* First stmt of the SLP group should be the first load of
|
||||
the interleaving loop if data permutation is not allowed.
|
||||
Check that there is no gap between the loads. */
|
||||
|
@ -2905,7 +2960,8 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, slp_tree *node,
|
|||
&first_stmt_def0_type,
|
||||
&first_stmt_def1_type,
|
||||
&first_stmt_const_oprnd,
|
||||
ncopies_for_cost))
|
||||
ncopies_for_cost,
|
||||
&pattern0, &pattern1))
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
@ -2929,8 +2985,8 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, slp_tree *node,
|
|||
SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node) = 0;
|
||||
SLP_TREE_INSIDE_OF_LOOP_COST (left_node) = 0;
|
||||
if (!vect_build_slp_tree (loop_vinfo, &left_node, group_size,
|
||||
slp_impossible, inside_cost, outside_cost,
|
||||
ncopies_for_cost))
|
||||
inside_cost, outside_cost,
|
||||
ncopies_for_cost, max_nunits))
|
||||
return false;
|
||||
|
||||
SLP_TREE_LEFT (*node) = left_node;
|
||||
|
@ -2946,8 +3002,8 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, slp_tree *node,
|
|||
SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node) = 0;
|
||||
SLP_TREE_INSIDE_OF_LOOP_COST (right_node) = 0;
|
||||
if (!vect_build_slp_tree (loop_vinfo, &right_node, group_size,
|
||||
slp_impossible, inside_cost, outside_cost,
|
||||
ncopies_for_cost))
|
||||
inside_cost, outside_cost,
|
||||
ncopies_for_cost, max_nunits))
|
||||
return false;
|
||||
|
||||
SLP_TREE_RIGHT (*node) = right_node;
|
||||
|
@ -3003,7 +3059,7 @@ vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
|
|||
|
||||
|
||||
/* Analyze an SLP instance starting from a group of strided stores. Call
|
||||
vect_build_slp_tree to build a tree of packed stmts if possible.
|
||||
vect_build_slp_tree to build a tree of packed stmts if possible.
|
||||
Return FALSE if it's impossible to SLP any stmt in the loop. */
|
||||
|
||||
static bool
|
||||
|
@ -3018,8 +3074,8 @@ vect_analyze_slp_instance (loop_vec_info loop_vinfo, gimple stmt)
|
|||
unsigned int vectorization_factor = 0, ncopies;
|
||||
bool slp_impossible = false;
|
||||
int inside_cost = 0, outside_cost = 0, ncopies_for_cost;
|
||||
unsigned int max_nunits = 0;
|
||||
|
||||
/* FORNOW: multiple types are not supported. */
|
||||
scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))));
|
||||
vectype = get_vectype_for_scalar_type (scalar_type);
|
||||
if (!vectype)
|
||||
|
@ -3035,13 +3091,6 @@ vect_analyze_slp_instance (loop_vec_info loop_vinfo, gimple stmt)
|
|||
nunits = TYPE_VECTOR_SUBPARTS (vectype);
|
||||
vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
|
||||
ncopies = vectorization_factor / nunits;
|
||||
if (ncopies > 1)
|
||||
{
|
||||
if (vect_print_dump_info (REPORT_SLP))
|
||||
fprintf (vect_dump, "SLP failed - multiple types ");
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Create a node (a root of the SLP tree) for the packed strided stores. */
|
||||
SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (gimple, heap, group_size);
|
||||
|
@ -3069,13 +3118,18 @@ vect_analyze_slp_instance (loop_vec_info loop_vinfo, gimple stmt)
|
|||
ncopies_for_cost = unrolling_factor * group_size / nunits;
|
||||
|
||||
/* Build the tree for the SLP instance. */
|
||||
if (vect_build_slp_tree (loop_vinfo, &node, group_size, &slp_impossible,
|
||||
&inside_cost, &outside_cost, ncopies_for_cost))
|
||||
if (vect_build_slp_tree (loop_vinfo, &node, group_size, &inside_cost,
|
||||
&outside_cost, ncopies_for_cost, &max_nunits))
|
||||
{
|
||||
/* Create a new SLP instance. */
|
||||
new_instance = XNEW (struct _slp_instance);
|
||||
SLP_INSTANCE_TREE (new_instance) = node;
|
||||
SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
|
||||
/* Calculate the unrolling factor based on the smallest type. */
|
||||
if (max_nunits > nunits)
|
||||
unrolling_factor = least_common_multiple (max_nunits, group_size)
|
||||
/ group_size;
|
||||
|
||||
SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
|
||||
SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
|
||||
SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
|
||||
|
@ -3181,7 +3235,8 @@ vect_detect_hybrid_slp_stmts (slp_tree node)
|
|||
&& TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
|
||||
FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
|
||||
if (vinfo_for_stmt (use_stmt)
|
||||
&& !STMT_SLP_TYPE (vinfo_for_stmt (use_stmt)))
|
||||
&& !STMT_SLP_TYPE (vinfo_for_stmt (use_stmt))
|
||||
&& STMT_VINFO_RELEVANT (vinfo_for_stmt (use_stmt)))
|
||||
vect_mark_slp_stmts (node, hybrid, i);
|
||||
|
||||
vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node));
|
||||
|
|
|
@ -374,7 +374,8 @@ vect_recog_widen_mult_pattern (gimple last_stmt,
|
|||
tree dummy;
|
||||
tree var;
|
||||
enum tree_code dummy_code;
|
||||
bool dummy_bool;
|
||||
int dummy_int;
|
||||
VEC (tree, heap) *dummy_vec;
|
||||
|
||||
if (!is_gimple_assign (last_stmt))
|
||||
return NULL;
|
||||
|
@ -415,7 +416,7 @@ vect_recog_widen_mult_pattern (gimple last_stmt,
|
|||
if (!vectype
|
||||
|| !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt, vectype,
|
||||
&dummy, &dummy, &dummy_code,
|
||||
&dummy_code, &dummy_bool, &dummy))
|
||||
&dummy_code, &dummy_int, &dummy_vec))
|
||||
return NULL;
|
||||
|
||||
*type_in = vectype;
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -2138,30 +2138,30 @@ vect_is_simple_use (tree operand, loop_vec_info loop_vinfo, gimple *def_stmt,
|
|||
- DECL1 and DECL2 are decls of target builtin functions to be used
|
||||
when vectorizing the operation, if available. In this case,
|
||||
CODE1 and CODE2 are CALL_EXPR.
|
||||
- DOUBLE_OP determines if the operation is a double cast, like
|
||||
char->short->int
|
||||
- INTERM_TYPE is the intermediate type required to perform the
|
||||
widening operation (short in the above example) */
|
||||
- MULTI_STEP_CVT determines the number of required intermediate steps in
|
||||
case of multi-step conversion (like char->short->int - in that case
|
||||
MULTI_STEP_CVT will be 1).
|
||||
- INTERM_TYPES contains the intermediate type required to perform the
|
||||
widening operation (short in the above example). */
|
||||
|
||||
bool
|
||||
supportable_widening_operation (enum tree_code code, gimple stmt, tree vectype,
|
||||
tree *decl1, tree *decl2,
|
||||
enum tree_code *code1, enum tree_code *code2,
|
||||
bool *double_op, tree *interm_type)
|
||||
int *multi_step_cvt,
|
||||
VEC (tree, heap) **interm_types)
|
||||
{
|
||||
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
|
||||
loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
|
||||
struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
|
||||
bool ordered_p;
|
||||
enum machine_mode vec_mode;
|
||||
enum insn_code icode1, icode2;
|
||||
enum insn_code icode1 = 0, icode2 = 0;
|
||||
optab optab1, optab2;
|
||||
tree type = gimple_expr_type (stmt);
|
||||
tree wide_vectype = get_vectype_for_scalar_type (type);
|
||||
enum tree_code c1, c2;
|
||||
|
||||
*double_op = false;
|
||||
|
||||
/* The result of a vectorized widening operation usually requires two vectors
|
||||
(because the widened results do not fit int one vector). The generated
|
||||
vector results would normally be expected to be generated in the same
|
||||
|
@ -2272,52 +2272,60 @@ supportable_widening_operation (enum tree_code code, gimple stmt, tree vectype,
|
|||
|
||||
vec_mode = TYPE_MODE (vectype);
|
||||
if ((icode1 = optab_handler (optab1, vec_mode)->insn_code) == CODE_FOR_nothing
|
||||
|| (icode2 = optab_handler (optab2, vec_mode)->insn_code)
|
||||
== CODE_FOR_nothing)
|
||||
|| (icode2 = optab_handler (optab2, vec_mode)->insn_code)
|
||||
== CODE_FOR_nothing)
|
||||
return false;
|
||||
|
||||
/* Check if it's a double cast, like char->int. In such case the intermediate
|
||||
type is short, and we check that char->short->int operaion is supported by
|
||||
the target. */
|
||||
/* Check if it's a multi-step conversion that can be done using intermediate
|
||||
types. */
|
||||
if (insn_data[icode1].operand[0].mode != TYPE_MODE (wide_vectype)
|
||||
|| insn_data[icode2].operand[0].mode != TYPE_MODE (wide_vectype))
|
||||
|| insn_data[icode2].operand[0].mode != TYPE_MODE (wide_vectype))
|
||||
{
|
||||
if (code == NOP_EXPR)
|
||||
int i;
|
||||
tree prev_type = vectype, intermediate_type;
|
||||
enum machine_mode intermediate_mode, prev_mode = vec_mode;
|
||||
optab optab3, optab4;
|
||||
|
||||
if (!CONVERT_EXPR_CODE_P (code))
|
||||
return false;
|
||||
|
||||
*code1 = c1;
|
||||
*code2 = c2;
|
||||
|
||||
/* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
|
||||
intermediate steps in promotion sequence. We try MAX_INTERM_CVT_STEPS
|
||||
to get to NARROW_VECTYPE, and fail if we do not. */
|
||||
*interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
|
||||
for (i = 0; i < 3; i++)
|
||||
{
|
||||
enum machine_mode intermediate_mode =
|
||||
insn_data[icode1].operand[0].mode;
|
||||
tree intermediate_type =
|
||||
lang_hooks.types.type_for_mode (intermediate_mode,
|
||||
TYPE_UNSIGNED (vectype));
|
||||
optab optab3 = optab_for_tree_code (c1, intermediate_type,
|
||||
optab_default);
|
||||
optab optab4 = optab_for_tree_code (c2, intermediate_type,
|
||||
optab_default);
|
||||
intermediate_mode = insn_data[icode1].operand[0].mode;
|
||||
intermediate_type = lang_hooks.types.type_for_mode (intermediate_mode,
|
||||
TYPE_UNSIGNED (prev_type));
|
||||
optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
|
||||
optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
|
||||
|
||||
if (!optab3 || !optab4)
|
||||
return false;
|
||||
|
||||
if ((icode1 = optab1->handlers[(int) vec_mode].insn_code)
|
||||
if (!optab3 || !optab4
|
||||
|| (icode1 = optab1->handlers[(int) prev_mode].insn_code)
|
||||
== CODE_FOR_nothing
|
||||
|| insn_data[icode1].operand[0].mode != intermediate_mode
|
||||
|| (icode2 = optab2->handlers[(int) vec_mode].insn_code)
|
||||
|| (icode2 = optab2->handlers[(int) prev_mode].insn_code)
|
||||
== CODE_FOR_nothing
|
||||
|| insn_data[icode2].operand[0].mode != intermediate_mode
|
||||
|| (icode1 = optab3->handlers[(int) intermediate_mode].insn_code)
|
||||
|| (icode1 = optab3->handlers[(int) intermediate_mode].insn_code)
|
||||
== CODE_FOR_nothing
|
||||
|| insn_data[icode1].operand[0].mode != TYPE_MODE (wide_vectype)
|
||||
|| (icode2 = optab4->handlers[(int) intermediate_mode].insn_code)
|
||||
== CODE_FOR_nothing
|
||||
|| insn_data[icode2].operand[0].mode != TYPE_MODE (wide_vectype))
|
||||
== CODE_FOR_nothing)
|
||||
return false;
|
||||
else
|
||||
{
|
||||
*double_op = true;
|
||||
*interm_type = intermediate_type;
|
||||
*code1 = c1;
|
||||
*code2 = c2;
|
||||
return true;
|
||||
}
|
||||
|
||||
VEC_quick_push (tree, *interm_types, intermediate_type);
|
||||
(*multi_step_cvt)++;
|
||||
|
||||
if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
|
||||
&& insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
|
||||
return true;
|
||||
|
||||
prev_type = intermediate_type;
|
||||
prev_mode = intermediate_mode;
|
||||
}
|
||||
|
||||
return false;
|
||||
|
@ -2342,16 +2350,17 @@ supportable_widening_operation (enum tree_code code, gimple stmt, tree vectype,
|
|||
Output:
|
||||
- CODE1 is the code of a vector operation to be used when
|
||||
vectorizing the operation, if available.
|
||||
- DOUBLE_OP determines if the operation is a double cast, like
|
||||
int->short->char
|
||||
- INTERMIDIATE_TYPE is the intermediate type required to perform the
|
||||
widening operation (short in the above example) */
|
||||
- MULTI_STEP_CVT determines the number of required intermediate steps in
|
||||
case of multi-step conversion (like int->short->char - in that case
|
||||
MULTI_STEP_CVT will be 1).
|
||||
- INTERM_TYPES contains the intermediate type required to perform the
|
||||
narrowing operation (short in the above example). */
|
||||
|
||||
bool
|
||||
supportable_narrowing_operation (enum tree_code code,
|
||||
const_gimple stmt, const_tree vectype,
|
||||
enum tree_code *code1, bool *double_op,
|
||||
tree *intermediate_type)
|
||||
const_gimple stmt, tree vectype,
|
||||
enum tree_code *code1, int *multi_step_cvt,
|
||||
VEC (tree, heap) **interm_types)
|
||||
{
|
||||
enum machine_mode vec_mode;
|
||||
enum insn_code icode1;
|
||||
|
@ -2359,6 +2368,8 @@ supportable_narrowing_operation (enum tree_code code,
|
|||
tree type = gimple_expr_type (stmt);
|
||||
tree narrow_vectype = get_vectype_for_scalar_type (type);
|
||||
enum tree_code c1;
|
||||
tree intermediate_type, prev_type;
|
||||
int i;
|
||||
|
||||
switch (code)
|
||||
{
|
||||
|
@ -2393,24 +2404,45 @@ supportable_narrowing_operation (enum tree_code code,
|
|||
== CODE_FOR_nothing)
|
||||
return false;
|
||||
|
||||
/* In case of NUNITS_IN == NUNITS_OUT/4 check that the it is possible to
|
||||
perform the operation using an intermediate type of NUNITS_OUT/2. */
|
||||
/* Check if it's a multi-step conversion that can be done using intermediate
|
||||
types. */
|
||||
if (insn_data[icode1].operand[0].mode != TYPE_MODE (narrow_vectype))
|
||||
{
|
||||
enum machine_mode intermediate_mode = insn_data[icode1].operand[0].mode;
|
||||
*intermediate_type = lang_hooks.types.type_for_mode (intermediate_mode,
|
||||
TYPE_UNSIGNED (vectype));
|
||||
interm_optab = optab_for_tree_code (VEC_PACK_TRUNC_EXPR,
|
||||
*intermediate_type, optab_default);
|
||||
if (!interm_optab)
|
||||
return false;
|
||||
enum machine_mode intermediate_mode, prev_mode = vec_mode;
|
||||
|
||||
if ((icode1 = interm_optab->handlers[(int) intermediate_mode].insn_code)
|
||||
== CODE_FOR_nothing
|
||||
|| insn_data[icode1].operand[0].mode != TYPE_MODE (narrow_vectype))
|
||||
return false;
|
||||
*code1 = c1;
|
||||
prev_type = vectype;
|
||||
/* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
|
||||
intermediate steps in promotion sequence. We try MAX_INTERM_CVT_STEPS
|
||||
to get to NARROW_VECTYPE, and fail if we do not. */
|
||||
*interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
|
||||
for (i = 0; i < 3; i++)
|
||||
{
|
||||
intermediate_mode = insn_data[icode1].operand[0].mode;
|
||||
intermediate_type = lang_hooks.types.type_for_mode (intermediate_mode,
|
||||
TYPE_UNSIGNED (prev_type));
|
||||
interm_optab = optab_for_tree_code (c1, intermediate_type,
|
||||
optab_default);
|
||||
if (!interm_optab
|
||||
|| (icode1 = optab1->handlers[(int) prev_mode].insn_code)
|
||||
== CODE_FOR_nothing
|
||||
|| insn_data[icode1].operand[0].mode != intermediate_mode
|
||||
|| (icode1
|
||||
= interm_optab->handlers[(int) intermediate_mode].insn_code)
|
||||
== CODE_FOR_nothing)
|
||||
return false;
|
||||
|
||||
*double_op = true;
|
||||
VEC_quick_push (tree, *interm_types, intermediate_type);
|
||||
(*multi_step_cvt)++;
|
||||
|
||||
if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
|
||||
return true;
|
||||
|
||||
prev_type = intermediate_type;
|
||||
prev_mode = intermediate_mode;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
*code1 = c1;
|
||||
|
|
|
@ -522,6 +522,10 @@ typedef struct _stmt_vec_info {
|
|||
#define TARG_VEC_STORE_COST 1
|
||||
#endif
|
||||
|
||||
/* The maximum number of intermediate steps required in multi-step type
|
||||
conversion. */
|
||||
#define MAX_INTERM_CVT_STEPS 3
|
||||
|
||||
/* Avoid GTY(()) on stmt_vec_info. */
|
||||
typedef void *vec_void_p;
|
||||
DEF_VEC_P (vec_void_p);
|
||||
|
@ -602,6 +606,16 @@ stmt_vinfo_set_outside_of_loop_cost (stmt_vec_info stmt_info, slp_tree slp_node,
|
|||
STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = cost;
|
||||
}
|
||||
|
||||
static inline int
|
||||
vect_pow2 (int x)
|
||||
{
|
||||
int i, res = 1;
|
||||
|
||||
for (i = 0; i < x; i++)
|
||||
res *= 2;
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
/*-----------------------------------------------------------------*/
|
||||
/* Info on data references alignment. */
|
||||
|
@ -671,9 +685,10 @@ extern enum dr_alignment_support vect_supportable_dr_alignment
|
|||
(struct data_reference *);
|
||||
extern bool reduction_code_for_scalar_code (enum tree_code, enum tree_code *);
|
||||
extern bool supportable_widening_operation (enum tree_code, gimple, tree,
|
||||
tree *, tree *, enum tree_code *, enum tree_code *, bool *, tree *);
|
||||
tree *, tree *, enum tree_code *, enum tree_code *,
|
||||
int *, VEC (tree, heap) **);
|
||||
extern bool supportable_narrowing_operation (enum tree_code, const_gimple,
|
||||
const_tree, enum tree_code *, bool *, tree *);
|
||||
tree, enum tree_code *, int *, VEC (tree, heap) **);
|
||||
|
||||
/* Creation and deletion of loop and stmt info structs. */
|
||||
extern loop_vec_info new_loop_vec_info (struct loop *loop);
|
||||
|
@ -705,9 +720,9 @@ extern bool vectorizable_store (gimple, gimple_stmt_iterator *, gimple *,
|
|||
extern bool vectorizable_operation (gimple, gimple_stmt_iterator *, gimple *,
|
||||
slp_tree);
|
||||
extern bool vectorizable_type_promotion (gimple, gimple_stmt_iterator *,
|
||||
gimple *);
|
||||
gimple *, slp_tree);
|
||||
extern bool vectorizable_type_demotion (gimple, gimple_stmt_iterator *,
|
||||
gimple *);
|
||||
gimple *, slp_tree);
|
||||
extern bool vectorizable_conversion (gimple, gimple_stmt_iterator *, gimple *,
|
||||
slp_tree);
|
||||
extern bool vectorizable_assignment (gimple, gimple_stmt_iterator *, gimple *,
|
||||
|
|
Loading…
Add table
Reference in a new issue