Support non-masked epilogue vectoriziation
gcc/ 2016-11-16 Yuri Rumyantsev <ysrumyan@gmail.com> * params.def (PARAM_VECT_EPILOGUES_NOMASK): New. * tree-if-conv.c (tree_if_conversion): Make public. * * tree-if-conv.h: New file. * tree-vect-data-refs.c (vect_analyze_data_ref_dependences) Avoid dynamic alias checks for epilogues. * tree-vect-loop-manip.c (vect_do_peeling): Return created epilog. * tree-vect-loop.c: include tree-if-conv.h. (new_loop_vec_info): Add zeroing orig_loop_info field. (vect_analyze_loop_2): Don't try to enhance alignment for epilogues. (vect_analyze_loop): Add argument ORIG_LOOP_INFO which is not NULL if epilogue is vectorized, set up orig_loop_info field of loop_vinfo using passed argument. (vect_transform_loop): Check if created epilogue should be returned for further vectorization with less vf. If-convert epilogue if required. Print vectorization success for epilogue. * tree-vectorizer.c (vectorize_loops): Add epilogue vectorization if it is required, pass loop_vinfo produced during vectorization of loop body to vect_analyze_loop. * tree-vectorizer.h (struct _loop_vec_info): Add new field orig_loop_info. (LOOP_VINFO_ORIG_LOOP_INFO): New. (LOOP_VINFO_EPILOGUE_P): New. (LOOP_VINFO_ORIG_VECT_FACTOR): New. (vect_do_peeling): Change prototype to return epilogue. (vect_analyze_loop): Add argument of loop_vec_info type. (vect_transform_loop): Return created loop. gcc/testsuite/ 2016-11-16 Yuri Rumyantsev <ysrumyan@gmail.com> * lib/target-supports.exp (check_avx2_hw_available): New. (check_effective_target_avx2_runtime): New. * gcc.dg/vect/vect-tail-nomask-1.c: New test. From-SVN: r242501
This commit is contained in:
parent
03b85dcd48
commit
598eaaa2a2
12 changed files with 340 additions and 33 deletions
|
@ -1,3 +1,32 @@
|
|||
2016-11-16 Yuri Rumyantsev <ysrumyan@gmail.com>
|
||||
|
||||
* params.def (PARAM_VECT_EPILOGUES_NOMASK): New.
|
||||
* tree-if-conv.c (tree_if_conversion): Make public.
|
||||
* * tree-if-conv.h: New file.
|
||||
* tree-vect-data-refs.c (vect_analyze_data_ref_dependences) Avoid
|
||||
dynamic alias checks for epilogues.
|
||||
* tree-vect-loop-manip.c (vect_do_peeling): Return created epilog.
|
||||
* tree-vect-loop.c: include tree-if-conv.h.
|
||||
(new_loop_vec_info): Add zeroing orig_loop_info field.
|
||||
(vect_analyze_loop_2): Don't try to enhance alignment for epilogues.
|
||||
(vect_analyze_loop): Add argument ORIG_LOOP_INFO which is not NULL
|
||||
if epilogue is vectorized, set up orig_loop_info field of loop_vinfo
|
||||
using passed argument.
|
||||
(vect_transform_loop): Check if created epilogue should be returned
|
||||
for further vectorization with less vf. If-convert epilogue if
|
||||
required. Print vectorization success for epilogue.
|
||||
* tree-vectorizer.c (vectorize_loops): Add epilogue vectorization
|
||||
if it is required, pass loop_vinfo produced during vectorization of
|
||||
loop body to vect_analyze_loop.
|
||||
* tree-vectorizer.h (struct _loop_vec_info): Add new field
|
||||
orig_loop_info.
|
||||
(LOOP_VINFO_ORIG_LOOP_INFO): New.
|
||||
(LOOP_VINFO_EPILOGUE_P): New.
|
||||
(LOOP_VINFO_ORIG_VECT_FACTOR): New.
|
||||
(vect_do_peeling): Change prototype to return epilogue.
|
||||
(vect_analyze_loop): Add argument of loop_vec_info type.
|
||||
(vect_transform_loop): Return created loop.
|
||||
|
||||
2016-11-16 Segher Boessenkool <segher@kernel.crashing.org>
|
||||
|
||||
* config/rs6000/rs6000.c (rs6000_components_for_bb): Mark the LR
|
||||
|
|
|
@ -1270,6 +1270,11 @@ DEFPARAM (PARAM_MAX_VRP_SWITCH_ASSERTIONS,
|
|||
"edge of a switch statement during VRP",
|
||||
10, 0, 0)
|
||||
|
||||
DEFPARAM (PARAM_VECT_EPILOGUES_NOMASK,
|
||||
"vect-epilogues-nomask",
|
||||
"Enable loop epilogue vectorization using smaller vector size.",
|
||||
0, 0, 1)
|
||||
|
||||
/*
|
||||
|
||||
Local variables:
|
||||
|
|
|
@ -1,3 +1,9 @@
|
|||
2016-11-16 Yuri Rumyantsev <ysrumyan@gmail.com>
|
||||
|
||||
* lib/target-supports.exp (check_avx2_hw_available): New.
|
||||
(check_effective_target_avx2_runtime): New.
|
||||
* gcc.dg/vect/vect-tail-nomask-1.c: New test.
|
||||
|
||||
2016-11-16 Tamar Christina <tamar.christina@arm.com>
|
||||
|
||||
PR testsuite/78136
|
||||
|
|
106
gcc/testsuite/gcc.dg/vect/vect-tail-nomask-1.c
Normal file
106
gcc/testsuite/gcc.dg/vect/vect-tail-nomask-1.c
Normal file
|
@ -0,0 +1,106 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-require-weak "" } */
|
||||
/* { dg-additional-options "--param vect-epilogues-nomask=1 -mavx2" { target avx2_runtime } } */
|
||||
|
||||
#define SIZE 1023
|
||||
#define ALIGN 64
|
||||
|
||||
extern int posix_memalign(void **memptr, __SIZE_TYPE__ alignment, __SIZE_TYPE__ size) __attribute__((weak));
|
||||
extern void free (void *);
|
||||
|
||||
void __attribute__((noinline))
|
||||
test_citer (int * __restrict__ a,
|
||||
int * __restrict__ b,
|
||||
int * __restrict__ c)
|
||||
{
|
||||
int i;
|
||||
|
||||
a = (int *)__builtin_assume_aligned (a, ALIGN);
|
||||
b = (int *)__builtin_assume_aligned (b, ALIGN);
|
||||
c = (int *)__builtin_assume_aligned (c, ALIGN);
|
||||
|
||||
for (i = 0; i < SIZE; i++)
|
||||
c[i] = a[i] + b[i];
|
||||
}
|
||||
|
||||
void __attribute__((noinline))
|
||||
test_viter (int * __restrict__ a,
|
||||
int * __restrict__ b,
|
||||
int * __restrict__ c,
|
||||
int size)
|
||||
{
|
||||
int i;
|
||||
|
||||
a = (int *)__builtin_assume_aligned (a, ALIGN);
|
||||
b = (int *)__builtin_assume_aligned (b, ALIGN);
|
||||
c = (int *)__builtin_assume_aligned (c, ALIGN);
|
||||
|
||||
for (i = 0; i < size; i++)
|
||||
c[i] = a[i] + b[i];
|
||||
}
|
||||
|
||||
void __attribute__((noinline))
|
||||
init_data (int * __restrict__ a,
|
||||
int * __restrict__ b,
|
||||
int * __restrict__ c,
|
||||
int size)
|
||||
{
|
||||
for (int i = 0; i < size; i++)
|
||||
{
|
||||
a[i] = i;
|
||||
b[i] = -i;
|
||||
c[i] = 0;
|
||||
asm volatile("": : :"memory");
|
||||
}
|
||||
a[size] = b[size] = c[size] = size;
|
||||
}
|
||||
|
||||
|
||||
void __attribute__((noinline))
|
||||
run_test ()
|
||||
{
|
||||
int *a;
|
||||
int *b;
|
||||
int *c;
|
||||
int i;
|
||||
|
||||
if (posix_memalign ((void **)&a, ALIGN, (SIZE + 1) * sizeof (int)) != 0)
|
||||
return;
|
||||
if (posix_memalign ((void **)&b, ALIGN, (SIZE + 1) * sizeof (int)) != 0)
|
||||
return;
|
||||
if (posix_memalign ((void **)&c, ALIGN, (SIZE + 1) * sizeof (int)) != 0)
|
||||
return;
|
||||
|
||||
init_data (a, b, c, SIZE);
|
||||
test_citer (a, b, c);
|
||||
for (i = 0; i < SIZE; i++)
|
||||
if (c[i] != a[i] + b[i])
|
||||
__builtin_abort ();
|
||||
if (a[SIZE] != SIZE || b[SIZE] != SIZE || c[SIZE] != SIZE)
|
||||
__builtin_abort ();
|
||||
|
||||
init_data (a, b, c, SIZE);
|
||||
test_viter (a, b, c, SIZE);
|
||||
for (i = 0; i < SIZE; i++)
|
||||
if (c[i] != a[i] + b[i])
|
||||
__builtin_abort ();
|
||||
if (a[SIZE] != SIZE || b[SIZE] != SIZE || c[SIZE] != SIZE)
|
||||
__builtin_abort ();
|
||||
|
||||
free (a);
|
||||
free (b);
|
||||
free (c);
|
||||
}
|
||||
|
||||
int
|
||||
main (int argc, const char **argv)
|
||||
{
|
||||
if (!posix_memalign)
|
||||
return 0;
|
||||
|
||||
run_test ();
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { target avx2_runtime } } } */
|
||||
/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE VECTORIZED \\(VS=16\\)" 2 "vect" { target avx2_runtime } } } */
|
|
@ -1730,6 +1730,36 @@ proc check_avx_hw_available { } {
|
|||
}]
|
||||
}
|
||||
|
||||
# Return 1 if the target supports executing AVX2 instructions, 0
|
||||
# otherwise. Cache the result.
|
||||
|
||||
proc check_avx2_hw_available { } {
|
||||
return [check_cached_effective_target avx2_hw_available {
|
||||
# If this is not the right target then we can skip the test.
|
||||
if { !([istarget x86_64-*-*] || [istarget i?86-*-*]) } {
|
||||
expr 0
|
||||
} else {
|
||||
check_runtime_nocache avx2_hw_available {
|
||||
#include "cpuid.h"
|
||||
int main ()
|
||||
{
|
||||
unsigned int eax, ebx, ecx, edx;
|
||||
if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx)
|
||||
|| ((ecx & bit_OSXSAVE) != bit_OSXSAVE))
|
||||
return 1;
|
||||
|
||||
if (__get_cpuid_max (0, NULL) < 7)
|
||||
return 1;
|
||||
|
||||
__cpuid_count (7, 0, eax, ebx, ecx, edx);
|
||||
|
||||
return (ebx & bit_AVX2) != bit_AVX2;
|
||||
}
|
||||
} ""
|
||||
}
|
||||
}]
|
||||
}
|
||||
|
||||
# Return 1 if the target supports running SSE executables, 0 otherwise.
|
||||
|
||||
proc check_effective_target_sse_runtime { } {
|
||||
|
@ -1805,6 +1835,17 @@ proc check_effective_target_avx_runtime { } {
|
|||
return 0
|
||||
}
|
||||
|
||||
# Return 1 if the target supports running AVX2 executables, 0 otherwise.
|
||||
|
||||
proc check_effective_target_avx2_runtime { } {
|
||||
if { [check_effective_target_avx2]
|
||||
&& [check_avx2_hw_available]
|
||||
&& [check_avx_os_support_available] } {
|
||||
return 1
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
# Return 1 if we are compiling for 64-bit PowerPC but we do not use direct
|
||||
# move instructions for moves from GPR to FPR.
|
||||
|
||||
|
|
|
@ -2734,7 +2734,7 @@ ifcvt_local_dce (basic_block bb)
|
|||
profitability analysis. Returns non-zero todo flags when something
|
||||
changed. */
|
||||
|
||||
static unsigned int
|
||||
unsigned int
|
||||
tree_if_conversion (struct loop *loop)
|
||||
{
|
||||
unsigned int todo = 0;
|
||||
|
|
24
gcc/tree-if-conv.h
Normal file
24
gcc/tree-if-conv.h
Normal file
|
@ -0,0 +1,24 @@
|
|||
/* Copyright (C) 2016 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free
|
||||
Software Foundation; either version 3, or (at your option) any later
|
||||
version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with GCC; see the file COPYING3. If not see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef GCC_TREE_IF_CONV_H
|
||||
#define GCC_TREE_IF_CONV_H
|
||||
|
||||
unsigned int tree_if_conversion (struct loop *);
|
||||
|
||||
#endif /* GCC_TREE_IF_CONV_H */
|
|
@ -480,9 +480,15 @@ vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo, int *max_vf)
|
|||
LOOP_VINFO_LOOP_NEST (loop_vinfo), true))
|
||||
return false;
|
||||
|
||||
FOR_EACH_VEC_ELT (LOOP_VINFO_DDRS (loop_vinfo), i, ddr)
|
||||
if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf))
|
||||
return false;
|
||||
/* For epilogues we either have no aliases or alias versioning
|
||||
was applied to original loop. Therefore we may just get max_vf
|
||||
using VF of original loop. */
|
||||
if (LOOP_VINFO_EPILOGUE_P (loop_vinfo))
|
||||
*max_vf = LOOP_VINFO_ORIG_VECT_FACTOR (loop_vinfo);
|
||||
else
|
||||
FOR_EACH_VEC_ELT (LOOP_VINFO_DDRS (loop_vinfo), i, ddr)
|
||||
if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -1614,11 +1614,13 @@ slpeel_update_phi_nodes_for_lcssa (struct loop *epilog)
|
|||
|
||||
Note this function peels prolog and epilog only if it's necessary,
|
||||
as well as guards.
|
||||
Returns created epilogue or NULL.
|
||||
|
||||
TODO: Guard for prefer_scalar_loop should be emitted along with
|
||||
versioning conditions if loop versioning is needed. */
|
||||
|
||||
void
|
||||
|
||||
struct loop *
|
||||
vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
|
||||
tree *niters_vector, int th, bool check_profitability,
|
||||
bool niters_no_overflow)
|
||||
|
@ -1634,7 +1636,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
|
|||
|| LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo));
|
||||
|
||||
if (!prolog_peeling && !epilog_peeling)
|
||||
return;
|
||||
return NULL;
|
||||
|
||||
prob_vector = 9 * REG_BR_PROB_BASE / 10;
|
||||
if ((vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo)) == 2)
|
||||
|
@ -1642,7 +1644,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
|
|||
prob_prolog = prob_epilog = (vf - 1) * REG_BR_PROB_BASE / vf;
|
||||
vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
|
||||
|
||||
struct loop *prolog, *epilog, *loop = LOOP_VINFO_LOOP (loop_vinfo);
|
||||
struct loop *prolog, *epilog = NULL, *loop = LOOP_VINFO_LOOP (loop_vinfo);
|
||||
struct loop *first_loop = loop;
|
||||
create_lcssa_for_virtual_phi (loop);
|
||||
update_ssa (TODO_update_ssa_only_virtuals);
|
||||
|
@ -1824,6 +1826,8 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
|
|||
}
|
||||
adjust_vec.release ();
|
||||
free_original_copy_tables ();
|
||||
|
||||
return epilog;
|
||||
}
|
||||
|
||||
/* Function vect_create_cond_for_niters_checks.
|
||||
|
|
|
@ -49,6 +49,7 @@ along with GCC; see the file COPYING3. If not see
|
|||
#include "gimple-fold.h"
|
||||
#include "cgraph.h"
|
||||
#include "tree-cfg.h"
|
||||
#include "tree-if-conv.h"
|
||||
|
||||
/* Loop Vectorization Pass.
|
||||
|
||||
|
@ -1171,6 +1172,7 @@ new_loop_vec_info (struct loop *loop)
|
|||
LOOP_VINFO_PEELING_FOR_GAPS (res) = false;
|
||||
LOOP_VINFO_PEELING_FOR_NITER (res) = false;
|
||||
LOOP_VINFO_OPERANDS_SWAPPED (res) = false;
|
||||
LOOP_VINFO_ORIG_LOOP_INFO (res) = NULL;
|
||||
|
||||
return res;
|
||||
}
|
||||
|
@ -2046,15 +2048,20 @@ start_over:
|
|||
if (!ok)
|
||||
return false;
|
||||
|
||||
/* This pass will decide on using loop versioning and/or loop peeling in
|
||||
order to enhance the alignment of data references in the loop. */
|
||||
ok = vect_enhance_data_refs_alignment (loop_vinfo);
|
||||
if (!ok)
|
||||
/* Do not invoke vect_enhance_data_refs_alignment for eplilogue
|
||||
vectorization. */
|
||||
if (!LOOP_VINFO_EPILOGUE_P (loop_vinfo))
|
||||
{
|
||||
if (dump_enabled_p ())
|
||||
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
||||
"bad data alignment.\n");
|
||||
return false;
|
||||
/* This pass will decide on using loop versioning and/or loop peeling in
|
||||
order to enhance the alignment of data references in the loop. */
|
||||
ok = vect_enhance_data_refs_alignment (loop_vinfo);
|
||||
if (!ok)
|
||||
{
|
||||
if (dump_enabled_p ())
|
||||
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
||||
"bad data alignment.\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (slp)
|
||||
|
@ -2308,9 +2315,10 @@ again:
|
|||
|
||||
Apply a set of analyses on LOOP, and create a loop_vec_info struct
|
||||
for it. The different analyses will record information in the
|
||||
loop_vec_info struct. */
|
||||
loop_vec_info struct. If ORIG_LOOP_VINFO is not NULL epilogue must
|
||||
be vectorized. */
|
||||
loop_vec_info
|
||||
vect_analyze_loop (struct loop *loop)
|
||||
vect_analyze_loop (struct loop *loop, loop_vec_info orig_loop_vinfo)
|
||||
{
|
||||
loop_vec_info loop_vinfo;
|
||||
unsigned int vector_sizes;
|
||||
|
@ -2346,6 +2354,10 @@ vect_analyze_loop (struct loop *loop)
|
|||
}
|
||||
|
||||
bool fatal = false;
|
||||
|
||||
if (orig_loop_vinfo)
|
||||
LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo) = orig_loop_vinfo;
|
||||
|
||||
if (vect_analyze_loop_2 (loop_vinfo, fatal))
|
||||
{
|
||||
LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
|
||||
|
@ -6696,12 +6708,14 @@ loop_niters_no_overflow (loop_vec_info loop_vinfo)
|
|||
|
||||
The analysis phase has determined that the loop is vectorizable.
|
||||
Vectorize the loop - created vectorized stmts to replace the scalar
|
||||
stmts in the loop, and update the loop exit condition. */
|
||||
stmts in the loop, and update the loop exit condition.
|
||||
Returns scalar epilogue loop if any. */
|
||||
|
||||
void
|
||||
struct loop *
|
||||
vect_transform_loop (loop_vec_info loop_vinfo)
|
||||
{
|
||||
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
|
||||
struct loop *epilogue = NULL;
|
||||
basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
|
||||
int nbbs = loop->num_nodes;
|
||||
int i;
|
||||
|
@ -6780,8 +6794,8 @@ vect_transform_loop (loop_vec_info loop_vinfo)
|
|||
LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = niters;
|
||||
tree nitersm1 = unshare_expr (LOOP_VINFO_NITERSM1 (loop_vinfo));
|
||||
bool niters_no_overflow = loop_niters_no_overflow (loop_vinfo);
|
||||
vect_do_peeling (loop_vinfo, niters, nitersm1, &niters_vector, th,
|
||||
check_profitability, niters_no_overflow);
|
||||
epilogue = vect_do_peeling (loop_vinfo, niters, nitersm1, &niters_vector, th,
|
||||
check_profitability, niters_no_overflow);
|
||||
if (niters_vector == NULL_TREE)
|
||||
{
|
||||
if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
|
||||
|
@ -7065,12 +7079,19 @@ vect_transform_loop (loop_vec_info loop_vinfo)
|
|||
|
||||
if (dump_enabled_p ())
|
||||
{
|
||||
dump_printf_loc (MSG_NOTE, vect_location,
|
||||
"LOOP VECTORIZED\n");
|
||||
if (loop->inner)
|
||||
if (!LOOP_VINFO_EPILOGUE_P (loop_vinfo))
|
||||
{
|
||||
dump_printf_loc (MSG_NOTE, vect_location,
|
||||
"LOOP VECTORIZED\n");
|
||||
if (loop->inner)
|
||||
dump_printf_loc (MSG_NOTE, vect_location,
|
||||
"OUTER LOOP VECTORIZED\n");
|
||||
dump_printf (MSG_NOTE, "\n");
|
||||
}
|
||||
else
|
||||
dump_printf_loc (MSG_NOTE, vect_location,
|
||||
"OUTER LOOP VECTORIZED\n");
|
||||
dump_printf (MSG_NOTE, "\n");
|
||||
"LOOP EPILOGUE VECTORIZED (VS=%d)\n",
|
||||
current_vector_size);
|
||||
}
|
||||
|
||||
/* Free SLP instances here because otherwise stmt reference counting
|
||||
|
@ -7082,6 +7103,49 @@ vect_transform_loop (loop_vec_info loop_vinfo)
|
|||
/* Clear-up safelen field since its value is invalid after vectorization
|
||||
since vectorized loop can have loop-carried dependencies. */
|
||||
loop->safelen = 0;
|
||||
|
||||
/* Don't vectorize epilogue for epilogue. */
|
||||
if (LOOP_VINFO_EPILOGUE_P (loop_vinfo))
|
||||
epilogue = NULL;
|
||||
|
||||
if (epilogue)
|
||||
{
|
||||
unsigned int vector_sizes
|
||||
= targetm.vectorize.autovectorize_vector_sizes ();
|
||||
vector_sizes &= current_vector_size - 1;
|
||||
|
||||
if (!PARAM_VALUE (PARAM_VECT_EPILOGUES_NOMASK))
|
||||
epilogue = NULL;
|
||||
else if (!vector_sizes)
|
||||
epilogue = NULL;
|
||||
else if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
|
||||
&& LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) >= 0)
|
||||
{
|
||||
int smallest_vec_size = 1 << ctz_hwi (vector_sizes);
|
||||
int ratio = current_vector_size / smallest_vec_size;
|
||||
int eiters = LOOP_VINFO_INT_NITERS (loop_vinfo)
|
||||
- LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
|
||||
eiters = eiters % vf;
|
||||
|
||||
epilogue->nb_iterations_upper_bound = eiters - 1;
|
||||
|
||||
if (eiters < vf / ratio)
|
||||
epilogue = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
if (epilogue)
|
||||
{
|
||||
epilogue->force_vectorize = loop->force_vectorize;
|
||||
epilogue->safelen = loop->safelen;
|
||||
epilogue->dont_vectorize = false;
|
||||
|
||||
/* We may need to if-convert epilogue to vectorize it. */
|
||||
if (LOOP_VINFO_SCALAR_LOOP (loop_vinfo))
|
||||
tree_if_conversion (epilogue);
|
||||
}
|
||||
|
||||
return epilogue;
|
||||
}
|
||||
|
||||
/* The code below is trying to perform simple optimization - revert
|
||||
|
|
|
@ -514,6 +514,7 @@ vectorize_loops (void)
|
|||
hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab = NULL;
|
||||
bool any_ifcvt_loops = false;
|
||||
unsigned ret = 0;
|
||||
struct loop *new_loop;
|
||||
|
||||
vect_loops_num = number_of_loops (cfun);
|
||||
|
||||
|
@ -538,7 +539,8 @@ vectorize_loops (void)
|
|||
&& optimize_loop_nest_for_speed_p (loop))
|
||||
|| loop->force_vectorize)
|
||||
{
|
||||
loop_vec_info loop_vinfo;
|
||||
loop_vec_info loop_vinfo, orig_loop_vinfo = NULL;
|
||||
vectorize_epilogue:
|
||||
vect_location = find_loop_location (loop);
|
||||
if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION
|
||||
&& dump_enabled_p ())
|
||||
|
@ -546,7 +548,7 @@ vectorize_loops (void)
|
|||
LOCATION_FILE (vect_location),
|
||||
LOCATION_LINE (vect_location));
|
||||
|
||||
loop_vinfo = vect_analyze_loop (loop);
|
||||
loop_vinfo = vect_analyze_loop (loop, orig_loop_vinfo);
|
||||
loop->aux = loop_vinfo;
|
||||
|
||||
if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
|
||||
|
@ -580,7 +582,7 @@ vectorize_loops (void)
|
|||
&& dump_enabled_p ())
|
||||
dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
|
||||
"loop vectorized\n");
|
||||
vect_transform_loop (loop_vinfo);
|
||||
new_loop = vect_transform_loop (loop_vinfo);
|
||||
num_vectorized_loops++;
|
||||
/* Now that the loop has been vectorized, allow it to be unrolled
|
||||
etc. */
|
||||
|
@ -602,6 +604,15 @@ vectorize_loops (void)
|
|||
fold_loop_vectorized_call (loop_vectorized_call, boolean_true_node);
|
||||
ret |= TODO_cleanup_cfg;
|
||||
}
|
||||
|
||||
if (new_loop)
|
||||
{
|
||||
/* Epilogue of vectorized loop must be vectorized too. */
|
||||
vect_loops_num = number_of_loops (cfun);
|
||||
loop = new_loop;
|
||||
orig_loop_vinfo = loop_vinfo; /* To pass vect_analyze_loop. */
|
||||
goto vectorize_epilogue;
|
||||
}
|
||||
}
|
||||
|
||||
vect_location = UNKNOWN_LOCATION;
|
||||
|
|
|
@ -335,6 +335,10 @@ typedef struct _loop_vec_info : public vec_info {
|
|||
/* Mark loops having masked stores. */
|
||||
bool has_mask_store;
|
||||
|
||||
/* For loops being epilogues of already vectorized loops
|
||||
this points to the original vectorized loop. Otherwise NULL. */
|
||||
_loop_vec_info *orig_loop_info;
|
||||
|
||||
} *loop_vec_info;
|
||||
|
||||
/* Access Functions. */
|
||||
|
@ -374,6 +378,7 @@ typedef struct _loop_vec_info : public vec_info {
|
|||
#define LOOP_VINFO_HAS_MASK_STORE(L) (L)->has_mask_store
|
||||
#define LOOP_VINFO_SCALAR_ITERATION_COST(L) (L)->scalar_cost_vec
|
||||
#define LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST(L) (L)->single_scalar_iteration_cost
|
||||
#define LOOP_VINFO_ORIG_LOOP_INFO(L) (L)->orig_loop_info
|
||||
|
||||
#define LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT(L) \
|
||||
((L)->may_misalign_stmts.length () > 0)
|
||||
|
@ -389,6 +394,12 @@ typedef struct _loop_vec_info : public vec_info {
|
|||
#define LOOP_VINFO_NITERS_KNOWN_P(L) \
|
||||
(tree_fits_shwi_p ((L)->num_iters) && tree_to_shwi ((L)->num_iters) > 0)
|
||||
|
||||
#define LOOP_VINFO_EPILOGUE_P(L) \
|
||||
(LOOP_VINFO_ORIG_LOOP_INFO (L) != NULL)
|
||||
|
||||
#define LOOP_VINFO_ORIG_VECT_FACTOR(L) \
|
||||
(LOOP_VINFO_VECT_FACTOR (LOOP_VINFO_ORIG_LOOP_INFO (L)))
|
||||
|
||||
static inline loop_vec_info
|
||||
loop_vec_info_for_loop (struct loop *loop)
|
||||
{
|
||||
|
@ -1032,8 +1043,8 @@ extern bool slpeel_can_duplicate_loop_p (const struct loop *, const_edge);
|
|||
struct loop *slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *,
|
||||
struct loop *, edge);
|
||||
extern void vect_loop_versioning (loop_vec_info, unsigned int, bool);
|
||||
extern void vect_do_peeling (loop_vec_info, tree, tree,
|
||||
tree *, int, bool, bool);
|
||||
extern struct loop *vect_do_peeling (loop_vec_info, tree, tree,
|
||||
tree *, int, bool, bool);
|
||||
extern source_location find_loop_location (struct loop *);
|
||||
extern bool vect_can_advance_ivs_p (loop_vec_info);
|
||||
|
||||
|
@ -1144,11 +1155,11 @@ extern void destroy_loop_vec_info (loop_vec_info, bool);
|
|||
extern gimple *vect_force_simple_reduction (loop_vec_info, gimple *, bool,
|
||||
bool *, bool);
|
||||
/* Drive for loop analysis stage. */
|
||||
extern loop_vec_info vect_analyze_loop (struct loop *);
|
||||
extern loop_vec_info vect_analyze_loop (struct loop *, loop_vec_info);
|
||||
extern tree vect_build_loop_niters (loop_vec_info);
|
||||
extern void vect_gen_vector_loop_niters (loop_vec_info, tree, tree *, bool);
|
||||
/* Drive for loop transformation stage. */
|
||||
extern void vect_transform_loop (loop_vec_info);
|
||||
extern struct loop *vect_transform_loop (loop_vec_info);
|
||||
extern loop_vec_info vect_analyze_loop_form (struct loop *);
|
||||
extern bool vectorizable_live_operation (gimple *, gimple_stmt_iterator *,
|
||||
slp_tree, int, gimple **);
|
||||
|
|
Loading…
Add table
Reference in a new issue