Fix PEELING_FOR_NITERS calculation (PR 87288)

PEELING_FOR_GAPS now means "peel one iteration for the epilogue",
in much the same way that PEELING_FOR_ALIGNMENT > 0 means
"peel that number of iterations for the prologue".  We weren't
taking this into account when deciding whether we needed to peel
further scalar iterations beyond the iterations for "gaps" and
"alignment".

Only the first test failed before the patch.  The other two
are just for completeness.

2018-09-20  Richard Sandiford  <richard.sandiford@arm.com>

gcc/
	PR tree-optimization/87288
	* tree-vect-loop.c (vect_analyze_loop_2): Take PEELING_FOR_GAPS
	into account when determining PEELING_FOR_NITERS.

gcc/testsuite/
	PR tree-optimization/87288
	* gcc.dg/vect/pr87288-1.c: New test.
	* gcc.dg/vect/pr87288-2.c: Likewise,
	* gcc.dg/vect/pr87288-3.c: Likewise.

From-SVN: r264440
This commit is contained in:
Richard Sandiford 2018-09-20 12:58:23 +00:00 committed by Richard Sandiford
parent 508a909eca
commit 2d2ee18641
6 changed files with 201 additions and 3 deletions

View file

@ -1,3 +1,9 @@
2018-09-20 Richard Sandiford <richard.sandiford@arm.com>
PR tree-optimization/87288
* tree-vect-loop.c (vect_analyze_loop_2): Take PEELING_FOR_GAPS
into account when determining PEELING_FOR_NITERS.
2018-09-20 Richard Sandiford <richard.sandiford@arm.com>
PR tree-optimization/86877

View file

@ -1,3 +1,10 @@
2018-09-20 Richard Sandiford <richard.sandiford@arm.com>
PR tree-optimization/87288
* gcc.dg/vect/pr87288-1.c: New test.
* gcc.dg/vect/pr87288-2.c: Likewise,
* gcc.dg/vect/pr87288-3.c: Likewise.
2018-09-20 Richard Sandiford <richard.sandiford@arm.com>
PR tree-optimization/86877

View file

@ -0,0 +1,49 @@
#include "tree-vect.h"
#define N (VECTOR_BITS / 32)
#define MAX_COUNT 4
void __attribute__ ((noipa))
run (int *restrict a, int *restrict b, int count)
{
for (int i = 0; i < count * N; ++i)
{
a[i * 2] = b[i * 2] + count;
a[i * 2 + 1] = count;
}
}
void __attribute__ ((noipa))
check (int *restrict a, int count)
{
for (int i = 0; i < count * N; ++i)
if (a[i * 2] != i * 41 + count || a[i * 2 + 1] != count)
__builtin_abort ();
if (a[count * 2 * N] != 999)
__builtin_abort ();
}
int a[N * MAX_COUNT * 2 + 1], b[N * MAX_COUNT * 2];
int
main (void)
{
check_vect ();
for (int i = 0; i < N * MAX_COUNT; ++i)
{
b[i * 2] = i * 41;
asm volatile ("" ::: "memory");
}
for (int i = 0; i <= MAX_COUNT; ++i)
{
a[i * 2 * N] = 999;
run (a, b, i);
check (a, i);
}
return 0;
}
/* { dg-final { scan-tree-dump-times {LOOP VECTORIZED} 1 "vect" { target { { vect_int && vect_perm } && vect_element_align } } } } */

View file

@ -0,0 +1,64 @@
#include "tree-vect.h"
#define N (VECTOR_BITS / 32)
#define MAX_COUNT 4
#define RUN_COUNT(COUNT) \
void __attribute__ ((noipa)) \
run_##COUNT (int *restrict a, int *restrict b) \
{ \
for (int i = 0; i < N * COUNT; ++i) \
{ \
a[i * 2] = b[i * 2] + COUNT; \
a[i * 2 + 1] = COUNT; \
} \
}
RUN_COUNT (1)
RUN_COUNT (2)
RUN_COUNT (3)
RUN_COUNT (4)
void __attribute__ ((noipa))
check (int *restrict a, int count)
{
for (int i = 0; i < count * N; ++i)
if (a[i * 2] != i * 41 + count || a[i * 2 + 1] != count)
__builtin_abort ();
if (a[count * 2 * N] != 999)
__builtin_abort ();
}
int a[N * MAX_COUNT * 2 + 1], b[N * MAX_COUNT * 2];
int
main (void)
{
check_vect ();
for (int i = 0; i < N * MAX_COUNT; ++i)
{
b[i * 2] = i * 41;
asm volatile ("" ::: "memory");
}
a[N * 2] = 999;
run_1 (a, b);
check (a, 1);
a[N * 4] = 999;
run_2 (a, b);
check (a, 2);
a[N * 6] = 999;
run_3 (a, b);
check (a, 3);
a[N * 8] = 999;
run_4 (a, b);
check (a, 4);
return 0;
}
/* { dg-final { scan-tree-dump {LOOP VECTORIZED} "vect" { target { { vect_int && vect_perm } && vect_element_align } } } } */

View file

@ -0,0 +1,64 @@
#include "tree-vect.h"
#define N (VECTOR_BITS / 32)
#define MAX_COUNT 4
#define RUN_COUNT(COUNT) \
void __attribute__ ((noipa)) \
run_##COUNT (int *restrict a, int *restrict b) \
{ \
for (int i = 0; i < N * COUNT + 1; ++i) \
{ \
a[i * 2] = b[i * 2] + COUNT; \
a[i * 2 + 1] = COUNT; \
} \
}
RUN_COUNT (1)
RUN_COUNT (2)
RUN_COUNT (3)
RUN_COUNT (4)
void __attribute__ ((noipa))
check (int *restrict a, int count)
{
for (int i = 0; i < count * N + 1; ++i)
if (a[i * 2] != i * 41 + count || a[i * 2 + 1] != count)
__builtin_abort ();
if (a[count * 2 * N + 2] != 999)
__builtin_abort ();
}
int a[N * MAX_COUNT * 2 + 3], b[N * MAX_COUNT * 2 + 2];
int
main (void)
{
check_vect ();
for (int i = 0; i < N * MAX_COUNT + 1; ++i)
{
b[i * 2] = i * 41;
asm volatile ("" ::: "memory");
}
a[N * 2 + 2] = 999;
run_1 (a, b);
check (a, 1);
a[N * 4 + 2] = 999;
run_2 (a, b);
check (a, 2);
a[N * 6 + 2] = 999;
run_3 (a, b);
check (a, 3);
a[N * 8 + 2] = 999;
run_4 (a, b);
check (a, 4);
return 0;
}
/* { dg-final { scan-tree-dump {LOOP VECTORIZED} "vect" { target { { vect_int && vect_perm } && vect_element_align } } } } */

View file

@ -2074,14 +2074,22 @@ start_over:
/* The main loop handles all iterations. */
LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = false;
else if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
&& LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
&& LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) >= 0)
{
if (!multiple_p (LOOP_VINFO_INT_NITERS (loop_vinfo)
- LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo),
/* Work out the (constant) number of iterations that need to be
peeled for reasons other than niters. */
unsigned int peel_niter = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
peel_niter += 1;
if (!multiple_p (LOOP_VINFO_INT_NITERS (loop_vinfo) - peel_niter,
LOOP_VINFO_VECT_FACTOR (loop_vinfo)))
LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true;
}
else if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
/* ??? When peeling for gaps but not alignment, we could
try to check whether the (variable) niters is known to be
VF * N + 1. That's something of a niche case though. */
|| LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
|| !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&const_vf)
|| ((tree_ctz (LOOP_VINFO_NITERS (loop_vinfo))
< (unsigned) exact_log2 (const_vf))