fix detection of negative step DR groups

This fixes a condition that caused all negative step DR groups to
be detected as single element interleaving.  Such groups are
rejected by interleaving vectorization but miscompiled by SLP
which is fixed by forcing VMAT_STRIDED_SLP for now.

2020-07-07  Richard Biener  <rguenther@suse.de>

	* tree-vect-data-refs.c (vect_analyze_data_ref_accesses): Fix
	group overlap condition to allow negative step DR groups.
	* tree-vect-stmts.c (get_group_load_store_type): For
	multi element SLP groups force VMAT_STRIDED_SLP when the step
	is negative.

	* gcc.dg/vect/slp-47.c: New testcase.
	* gcc.dg/vect/slp-48.c: Likewise.
This commit is contained in:
Richard Biener 2020-07-07 13:57:40 +02:00
parent e1b0956a8e
commit f75211822f
4 changed files with 126 additions and 5 deletions

View file

@ -0,0 +1,56 @@
/* { dg-require-effective-target vect_int } */
#include "tree-vect.h"
int x[1024], y[1024];
void __attribute__((noipa)) foo()
{
for (int i = 0; i < 512; ++i)
{
x[2*i] = y[1023 - (2*i)];
x[2*i+1] = y[1023 - (2*i+1)];
}
}
void __attribute__((noipa)) bar()
{
for (int i = 0; i < 512; ++i)
{
x[2*i] = y[1023 - (2*i+1)];
x[2*i+1] = y[1023 - (2*i)];
}
}
int
main ()
{
check_vect ();
for (int i = 0; i < 1024; ++i)
{
x[i] = 0;
y[i] = i;
__asm__ volatile ("");
}
foo ();
for (int i = 0; i < 1024; ++i)
if (x[i] != y[1023 - i])
abort ();
for (int i = 0; i < 1024; ++i)
{
x[i] = 0;
__asm__ volatile ("");
}
bar ();
for (int i = 0; i < 1024; ++i)
if (x[i] != y[1023 - i^1])
abort ();
return 0;
}
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" } } */

View file

@ -0,0 +1,56 @@
/* { dg-require-effective-target vect_int } */
#include "tree-vect.h"
int x[1024], y[1024];
void __attribute__((noipa)) foo()
{
for (int i = 0; i < 512; ++i)
{
x[1023 - (2*i+1)] = y[2*i];
x[1023 - (2*i)] = y[2*i+1];
}
}
void __attribute__((noipa)) bar()
{
for (int i = 0; i < 512; ++i)
{
x[1023 - (2*i+1)] = y[2*i+1];
x[1023 - (2*i)] = y[2*i];
}
}
int
main ()
{
check_vect ();
for (int i = 0; i < 1024; ++i)
{
x[i] = 0;
y[i] = i;
__asm__ volatile ("");
}
foo ();
for (int i = 0; i < 1024; ++i)
if (x[i] != y[1023 - i^1])
abort ();
for (int i = 0; i < 1024; ++i)
{
x[i] = 0;
__asm__ volatile ("");
}
bar ();
for (int i = 0; i < 1024; ++i)
if (x[i] != y[1023 - i])
abort ();
return 0;
}
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" } } */

View file

@ -3074,13 +3074,15 @@ vect_analyze_data_ref_accesses (vec_info *vinfo)
if (!DR_IS_READ (dra) && init_b - init_prev != type_size_a)
break;
/* If the step (if not zero or non-constant) is greater than the
/* If the step (if not zero or non-constant) is smaller than the
difference between data-refs' inits this splits groups into
suitable sizes. */
if (tree_fits_shwi_p (DR_STEP (dra)))
{
HOST_WIDE_INT step = tree_to_shwi (DR_STEP (dra));
if (step != 0 && step <= (init_b - init_a))
unsigned HOST_WIDE_INT step
= absu_hwi (tree_to_shwi (DR_STEP (dra)));
if (step != 0
&& step <= (unsigned HOST_WIDE_INT)(init_b - init_a))
break;
}
}

View file

@ -2150,8 +2150,15 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
}
int cmp = compare_step_with_zero (vinfo, stmt_info);
if (cmp < 0)
*memory_access_type = get_negative_load_store_type
(vinfo, stmt_info, vectype, vls_type, 1);
{
if (single_element_p)
/* ??? The VMAT_CONTIGUOUS_REVERSE code generation is
only correct for single element "interleaving" SLP. */
*memory_access_type = get_negative_load_store_type
(vinfo, stmt_info, vectype, vls_type, 1);
else
*memory_access_type = VMAT_STRIDED_SLP;
}
else
{
gcc_assert (!loop_vinfo || cmp > 0);