OMP SIMD inbranch call vectorization for AVX512 style masks
The following teaches vectorizable_simd_clone_call to handle integer mode masks. The tricky bit is to second-guess the number of lanes represented by a single mask argument - the following uses simdlen and the number of mask arguments to calculate that, assuming ABIs have them uniform. Similar to the VOIDmode handling there's a restriction on not supporting splitting/merging of incoming vector masks to more/less SIMD call arguments. PR tree-optimization/111795 * tree-vect-stmts.cc (vectorizable_simd_clone_call): Handle integer mode mask arguments. * gcc.target/i386/vect-simd-clone-avx512-1.c: New testcase. * gcc.target/i386/vect-simd-clone-avx512-2.c: Likewise. * gcc.target/i386/vect-simd-clone-avx512-3.c: Likewise.
This commit is contained in:
parent
63eaccd114
commit
3179ad72f6
4 changed files with 175 additions and 30 deletions
43
gcc/testsuite/gcc.target/i386/vect-simd-clone-avx512-1.c
Normal file
43
gcc/testsuite/gcc.target/i386/vect-simd-clone-avx512-1.c
Normal file
|
@ -0,0 +1,43 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target avx512vl } */
|
||||
/* { dg-options "-O2 -fopenmp-simd -mavx512vl" } */
|
||||
|
||||
#include "avx512vl-check.h"
|
||||
|
||||
#ifndef SIMDLEN
|
||||
#define SIMDLEN 4
|
||||
#endif
|
||||
|
||||
int x[1024];
|
||||
|
||||
#pragma omp declare simd simdlen(SIMDLEN)
|
||||
__attribute__((noinline)) int
|
||||
foo (int a, int b)
|
||||
{
|
||||
return a + b;
|
||||
}
|
||||
|
||||
void __attribute__((noipa))
|
||||
bar (void)
|
||||
{
|
||||
#pragma omp simd
|
||||
for (int i = 0; i < 1024; i++)
|
||||
if (x[i] < 20)
|
||||
x[i] = foo (x[i], x[i]);
|
||||
}
|
||||
|
||||
void avx512vl_test ()
|
||||
{
|
||||
int i;
|
||||
#pragma GCC novector
|
||||
for (i = 0; i < 1024; i++)
|
||||
x[i] = i;
|
||||
|
||||
bar ();
|
||||
|
||||
#pragma GCC novector
|
||||
for (i = 0; i < 1024; i++)
|
||||
if ((i < 20 && x[i] != i + i)
|
||||
|| (i >= 20 && x[i] != i))
|
||||
abort ();
|
||||
}
|
6
gcc/testsuite/gcc.target/i386/vect-simd-clone-avx512-2.c
Normal file
6
gcc/testsuite/gcc.target/i386/vect-simd-clone-avx512-2.c
Normal file
|
@ -0,0 +1,6 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target avx512vl } */
|
||||
/* { dg-options "-O2 -fopenmp-simd -mavx512vl" } */
|
||||
|
||||
#define SIMDLEN 8
|
||||
#include "vect-simd-clone-avx512-1.c"
|
6
gcc/testsuite/gcc.target/i386/vect-simd-clone-avx512-3.c
Normal file
6
gcc/testsuite/gcc.target/i386/vect-simd-clone-avx512-3.c
Normal file
|
@ -0,0 +1,6 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target avx512vl } */
|
||||
/* { dg-options "-O2 -fopenmp-simd -mavx512vl" } */
|
||||
|
||||
#define SIMDLEN 16
|
||||
#include "vect-simd-clone-avx512-1.c"
|
|
@ -4385,6 +4385,9 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
|
|||
i = -1;
|
||||
break;
|
||||
case SIMD_CLONE_ARG_TYPE_MASK:
|
||||
if (SCALAR_INT_MODE_P (n->simdclone->mask_mode)
|
||||
!= SCALAR_INT_MODE_P (TYPE_MODE (arginfo[i].vectype)))
|
||||
i = -1;
|
||||
break;
|
||||
}
|
||||
if (i == (size_t) -1)
|
||||
|
@ -4410,6 +4413,12 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
|
|||
if (bestn == NULL)
|
||||
return false;
|
||||
|
||||
unsigned int num_mask_args = 0;
|
||||
if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
|
||||
for (i = 0; i < nargs; i++)
|
||||
if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK)
|
||||
num_mask_args++;
|
||||
|
||||
for (i = 0; i < nargs; i++)
|
||||
{
|
||||
if ((arginfo[i].dt == vect_constant_def
|
||||
|
@ -4434,30 +4443,50 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
|
|||
return false;
|
||||
}
|
||||
|
||||
if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK
|
||||
&& bestn->simdclone->mask_mode == VOIDmode
|
||||
&& (simd_clone_subparts (bestn->simdclone->args[i].vector_type)
|
||||
!= simd_clone_subparts (arginfo[i].vectype)))
|
||||
if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK)
|
||||
{
|
||||
/* FORNOW we only have partial support for vector-type masks that
|
||||
can't hold all of simdlen. */
|
||||
if (dump_enabled_p ())
|
||||
dump_printf_loc (MSG_MISSED_OPTIMIZATION,
|
||||
vect_location,
|
||||
"in-branch vector clones are not yet"
|
||||
" supported for mismatched vector sizes.\n");
|
||||
return false;
|
||||
}
|
||||
if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK
|
||||
&& bestn->simdclone->mask_mode != VOIDmode)
|
||||
{
|
||||
/* FORNOW don't support integer-type masks. */
|
||||
if (dump_enabled_p ())
|
||||
dump_printf_loc (MSG_MISSED_OPTIMIZATION,
|
||||
vect_location,
|
||||
"in-branch vector clones are not yet"
|
||||
" supported for integer mask modes.\n");
|
||||
return false;
|
||||
if (bestn->simdclone->mask_mode == VOIDmode)
|
||||
{
|
||||
if (simd_clone_subparts (bestn->simdclone->args[i].vector_type)
|
||||
!= simd_clone_subparts (arginfo[i].vectype))
|
||||
{
|
||||
/* FORNOW we only have partial support for vector-type masks
|
||||
that can't hold all of simdlen. */
|
||||
if (dump_enabled_p ())
|
||||
dump_printf_loc (MSG_MISSED_OPTIMIZATION,
|
||||
vect_location,
|
||||
"in-branch vector clones are not yet"
|
||||
" supported for mismatched vector sizes.\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
|
||||
{
|
||||
if (!SCALAR_INT_MODE_P (TYPE_MODE (arginfo[i].vectype))
|
||||
|| maybe_ne (exact_div (bestn->simdclone->simdlen,
|
||||
num_mask_args),
|
||||
simd_clone_subparts (arginfo[i].vectype)))
|
||||
{
|
||||
/* FORNOW we only have partial support for integer-type masks
|
||||
that represent the same number of lanes as the
|
||||
vectorized mask inputs. */
|
||||
if (dump_enabled_p ())
|
||||
dump_printf_loc (MSG_MISSED_OPTIMIZATION,
|
||||
vect_location,
|
||||
"in-branch vector clones are not yet "
|
||||
"supported for mismatched vector sizes.\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (dump_enabled_p ())
|
||||
dump_printf_loc (MSG_MISSED_OPTIMIZATION,
|
||||
vect_location,
|
||||
"in-branch vector clones not supported"
|
||||
" on this target.\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -4674,14 +4703,9 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
|
|||
}
|
||||
break;
|
||||
case SIMD_CLONE_ARG_TYPE_MASK:
|
||||
atype = bestn->simdclone->args[i].vector_type;
|
||||
if (bestn->simdclone->mask_mode != VOIDmode)
|
||||
{
|
||||
/* FORNOW: this is disabled above. */
|
||||
gcc_unreachable ();
|
||||
}
|
||||
else
|
||||
if (bestn->simdclone->mask_mode == VOIDmode)
|
||||
{
|
||||
atype = bestn->simdclone->args[i].vector_type;
|
||||
tree elt_type = TREE_TYPE (atype);
|
||||
tree one = fold_convert (elt_type, integer_one_node);
|
||||
tree zero = fold_convert (elt_type, integer_zero_node);
|
||||
|
@ -4732,6 +4756,72 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
|
|||
}
|
||||
}
|
||||
}
|
||||
else if (SCALAR_INT_MODE_P (bestn->simdclone->mask_mode))
|
||||
{
|
||||
atype = bestn->simdclone->args[i].vector_type;
|
||||
/* Guess the number of lanes represented by atype. */
|
||||
unsigned HOST_WIDE_INT atype_subparts
|
||||
= exact_div (bestn->simdclone->simdlen,
|
||||
num_mask_args).to_constant ();
|
||||
o = vector_unroll_factor (nunits, atype_subparts);
|
||||
for (m = j * o; m < (j + 1) * o; m++)
|
||||
{
|
||||
if (m == 0)
|
||||
{
|
||||
if (!slp_node)
|
||||
vect_get_vec_defs_for_operand (vinfo, stmt_info,
|
||||
o * ncopies,
|
||||
op,
|
||||
&vec_oprnds[i]);
|
||||
vec_oprnds_i[i] = 0;
|
||||
}
|
||||
if (atype_subparts
|
||||
< simd_clone_subparts (arginfo[i].vectype))
|
||||
{
|
||||
/* The mask argument has fewer elements than the
|
||||
input vector. */
|
||||
/* FORNOW */
|
||||
gcc_unreachable ();
|
||||
}
|
||||
else if (atype_subparts
|
||||
== simd_clone_subparts (arginfo[i].vectype))
|
||||
{
|
||||
/* The vector mask argument matches the input
|
||||
in the number of lanes, but not necessarily
|
||||
in the mode. */
|
||||
vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++];
|
||||
tree st = lang_hooks.types.type_for_mode
|
||||
(TYPE_MODE (TREE_TYPE (vec_oprnd0)), 1);
|
||||
vec_oprnd0 = build1 (VIEW_CONVERT_EXPR, st,
|
||||
vec_oprnd0);
|
||||
gassign *new_stmt
|
||||
= gimple_build_assign (make_ssa_name (st),
|
||||
vec_oprnd0);
|
||||
vect_finish_stmt_generation (vinfo, stmt_info,
|
||||
new_stmt, gsi);
|
||||
if (!types_compatible_p (atype, st))
|
||||
{
|
||||
new_stmt
|
||||
= gimple_build_assign (make_ssa_name (atype),
|
||||
NOP_EXPR,
|
||||
gimple_assign_lhs
|
||||
(new_stmt));
|
||||
vect_finish_stmt_generation (vinfo, stmt_info,
|
||||
new_stmt, gsi);
|
||||
}
|
||||
vargs.safe_push (gimple_assign_lhs (new_stmt));
|
||||
}
|
||||
else
|
||||
{
|
||||
/* The mask argument has more elements than the
|
||||
input vector. */
|
||||
/* FORNOW */
|
||||
gcc_unreachable ();
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
gcc_unreachable ();
|
||||
break;
|
||||
case SIMD_CLONE_ARG_TYPE_UNIFORM:
|
||||
vargs.safe_push (op);
|
||||
|
|
Loading…
Add table
Reference in a new issue