vect: Allow same precision for bit-precision conversions.
In PR111794 we miss a vectorization because on riscv type precision and mode precision differ for mask types. We can still vectorize when allowing assignments with the same precision for dest and source which is what this patch does. gcc/ChangeLog: PR tree-optimization/111794 * tree-vect-stmts.cc (vectorizable_assignment): Add same-precision exception for dest and source. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/slp-mask-1.c: New test. * gcc.target/riscv/rvv/autovec/slp-mask-run-1.c: New test.
This commit is contained in:
parent
82bbbb73c6
commit
32b74c9e1d
3 changed files with 56 additions and 5 deletions
18
gcc/testsuite/gcc.target/riscv/rvv/autovec/slp-mask-1.c
Normal file
18
gcc/testsuite/gcc.target/riscv/rvv/autovec/slp-mask-1.c
Normal file
|
@ -0,0 +1,18 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-additional-options "-std=gnu99 -O3 -march=rv64gcv -mabi=lp64d --param=riscv-autovec-preference=scalable -fdump-tree-slp-details" } */
|
||||
|
||||
void
|
||||
__attribute__ ((noipa))
|
||||
f (int *restrict x, short *restrict y, int *restrict res)
|
||||
{
|
||||
res[0] = x[0] == 1 & y[0] == 2;
|
||||
res[1] = x[1] == 1 & y[1] == 2;
|
||||
res[2] = x[2] == 1 & y[2] == 2;
|
||||
res[3] = x[3] == 1 & y[3] == 2;
|
||||
res[4] = x[4] == 1 & y[4] == 2;
|
||||
res[5] = x[5] == 1 & y[5] == 2;
|
||||
res[6] = x[6] == 1 & y[6] == 2;
|
||||
res[7] = x[7] == 1 & y[7] == 2;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "slp2" } } */
|
31
gcc/testsuite/gcc.target/riscv/rvv/autovec/slp-mask-run-1.c
Normal file
31
gcc/testsuite/gcc.target/riscv/rvv/autovec/slp-mask-run-1.c
Normal file
|
@ -0,0 +1,31 @@
|
|||
/* { dg-do run { target { riscv_v } } } */
|
||||
/* { dg-additional-options "-std=gnu99 -O3 -march=rv64gcv -mabi=lp64d --param=riscv-autovec-preference=scalable" } */
|
||||
|
||||
#include <malloc.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "slp-mask-1.c"
|
||||
|
||||
#define SZ 8
|
||||
|
||||
__attribute__ ((optimize ("1")))
|
||||
int main ()
|
||||
{
|
||||
int *a = malloc (SZ * sizeof (*a));
|
||||
short *b = malloc (SZ * sizeof (*b));
|
||||
int *res = malloc (SZ * sizeof (*res));
|
||||
int *ref = malloc (SZ * sizeof (*ref));
|
||||
|
||||
for (int i = 0; i < SZ; i++)
|
||||
{
|
||||
a[i] = i & 1;
|
||||
b[i] = 2;
|
||||
ref[i] = a[i] == 1 & b[i] == 2;
|
||||
}
|
||||
|
||||
f (a, b, res);
|
||||
|
||||
for (int i = 0; i < SZ; i++)
|
||||
if (res[i] != ref[i])
|
||||
__builtin_abort ();
|
||||
}
|
|
@ -6058,14 +6058,16 @@ vectorizable_assignment (vec_info *vinfo,
|
|||
/* But a conversion that does not change the bit-pattern is ok. */
|
||||
&& !(INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
|
||||
&& INTEGRAL_TYPE_P (TREE_TYPE (op))
|
||||
&& (TYPE_PRECISION (TREE_TYPE (scalar_dest))
|
||||
&& (((TYPE_PRECISION (TREE_TYPE (scalar_dest))
|
||||
> TYPE_PRECISION (TREE_TYPE (op)))
|
||||
&& TYPE_UNSIGNED (TREE_TYPE (op))))
|
||||
&& TYPE_UNSIGNED (TREE_TYPE (op)))
|
||||
|| (TYPE_PRECISION (TREE_TYPE (scalar_dest))
|
||||
== TYPE_PRECISION (TREE_TYPE (op))))))
|
||||
{
|
||||
if (dump_enabled_p ())
|
||||
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
||||
"type conversion to/from bit-precision "
|
||||
"unsupported.\n");
|
||||
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
||||
"type conversion to/from bit-precision "
|
||||
"unsupported.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue