New pass: loop flattening.
2010-09-09 Sebastian Pop <sebastian.pop@amd.com> * Makefile.in (OBJS-common): Add graphite-flattening.o. (graphite-flattening.o): New rule. * common.opt (floop-flatten): New flag. * doc/invoke.texi (-floop-flatten): Documented. * graphite-flattening.c: New. * graphite-poly.c (apply_poly_transforms): Call flatten_all_loops. * graphite-poly.h (flatten_all_loops): Declared. (lst_remove_loop_and_inline_stmts_in_loop_father): New. * tree-ssa-loop.c (gate_graphite_transforms): When flag_loop_flatten is set, also set flag_graphite. From-SVN: r164804
This commit is contained in:
parent
c498b9b997
commit
98af4c9ffe
9 changed files with 525 additions and 7 deletions
|
@ -1,3 +1,16 @@
|
|||
2010-09-30 Sebastian Pop <sebastian.pop@amd.com>
|
||||
|
||||
* Makefile.in (OBJS-common): Add graphite-flattening.o.
|
||||
(graphite-flattening.o): New rule.
|
||||
* common.opt (floop-flatten): New flag.
|
||||
* doc/invoke.texi (-floop-flatten): Documented.
|
||||
* graphite-flattening.c: New.
|
||||
* graphite-poly.c (apply_poly_transforms): Call flatten_all_loops.
|
||||
* graphite-poly.h (flatten_all_loops): Declared.
|
||||
(lst_remove_loop_and_inline_stmts_in_loop_father): New.
|
||||
* tree-ssa-loop.c (gate_graphite_transforms): When flag_loop_flatten
|
||||
is set, also set flag_graphite.
|
||||
|
||||
2010-09-30 Sebastian Pop <sebastian.pop@amd.com>
|
||||
|
||||
* graphite-poly.c (cloog_checksum): New.
|
||||
|
|
|
@ -1,3 +1,16 @@
|
|||
2010-09-09 Sebastian Pop <sebastian.pop@amd.com>
|
||||
|
||||
* Makefile.in (OBJS-common): Add graphite-flattening.o.
|
||||
(graphite-flattening.o): New rule.
|
||||
* common.opt (floop-flatten): New flag.
|
||||
* doc/invoke.texi (-floop-flatten): Documented.
|
||||
* graphite-flattening.c: New.
|
||||
* graphite-poly.c (apply_poly_transforms): Call flatten_all_loops.
|
||||
* graphite-poly.h (flatten_all_loops): Declared.
|
||||
(lst_remove_loop_and_inline_stmts_in_loop_father): New.
|
||||
* tree-ssa-loop.c (gate_graphite_transforms): When flag_loop_flatten
|
||||
is set, also set flag_graphite.
|
||||
|
||||
2010-09-09 Sebastian Pop <sebastian.pop@amd.com>
|
||||
|
||||
* graphite-poly.c (cloog_checksum): New.
|
||||
|
|
|
@ -1244,6 +1244,7 @@ OBJS-common = \
|
|||
graphite-clast-to-gimple.o \
|
||||
graphite-cloog-util.o \
|
||||
graphite-dependences.o \
|
||||
graphite-flattening.o \
|
||||
graphite-interchange.o \
|
||||
graphite-poly.o \
|
||||
graphite-ppl.o \
|
||||
|
@ -2695,6 +2696,12 @@ graphite-dependences.o: graphite-dependences.c $(CONFIG_H) $(SYSTEM_H) \
|
|||
$(TOPLEV_H) $(DIAGNOSTIC_CORE_H) $(TREE_FLOW_H) $(TREE_DUMP_H) $(TIMEVAR_H) $(CFGLOOP_H) \
|
||||
$(GIMPLE_H) $(TREE_DATA_REF_H) tree-pass.h domwalk.h \
|
||||
graphite.h graphite-poly.h graphite-ppl.h graphite-dependences.h
|
||||
graphite-flattening.o: graphite-flattening.c $(CONFIG_H) $(SYSTEM_H) \
|
||||
coretypes.h $(TM_H) $(GGC_H) $(TREE_H) $(RTL_H) output.h \
|
||||
$(BASIC_BLOCK_H) $(DIAGNOSTIC_H) $(TOPLEV_H) $(TREE_FLOW_H) \
|
||||
$(TREE_DUMP_H) $(TIMEVAR_H) $(CFGLOOP_H) $(GIMPLE_H) \
|
||||
$(TREE_DATA_REF_H) tree-pass.h domwalk.h value-prof.h graphite.h \
|
||||
graphite-poly.h graphite-ppl.h
|
||||
graphite-interchange.o: graphite-interchange.c $(CONFIG_H) $(SYSTEM_H) \
|
||||
coretypes.h \
|
||||
$(TM_H) $(GGC_H) $(TREE_H) $(RTL_H) output.h $(BASIC_BLOCK_H) $(DIAGNOSTIC_H) \
|
||||
|
|
|
@ -870,6 +870,10 @@ floop-block
|
|||
Common Report Var(flag_loop_block) Optimization
|
||||
Enable Loop Blocking transformation
|
||||
|
||||
floop-flatten
|
||||
Common Report Var(flag_loop_flatten) Optimization
|
||||
Enable Loop Flattening transformation
|
||||
|
||||
fstrict-volatile-bitfields
|
||||
Common Report Var(flag_strict_volatile_bitfields) Init(-1)
|
||||
Force bitfield accesses to match their type width
|
||||
|
|
|
@ -352,7 +352,7 @@ Objective-C and Objective-C++ Dialects}.
|
|||
-fira-loop-pressure -fno-ira-share-save-slots @gol
|
||||
-fno-ira-share-spill-slots -fira-verbose=@var{n} @gol
|
||||
-fivopts -fkeep-inline-functions -fkeep-static-consts @gol
|
||||
-floop-block -floop-interchange -floop-strip-mine @gol
|
||||
-floop-block -floop-flatten -floop-interchange -floop-strip-mine @gol
|
||||
-floop-parallelize-all -flto -flto-compression-level -flto-report @gol
|
||||
-fltrans -fltrans-output-list -fmerge-all-constants -fmerge-constants @gol
|
||||
-fmodulo-sched -fmodulo-sched-allow-regmoves -fmove-loop-invariants @gol
|
||||
|
@ -6798,6 +6798,7 @@ Perform linear loop transformations on tree. This flag can improve cache
|
|||
performance and allow further loop optimizations to take place.
|
||||
|
||||
@item -floop-interchange
|
||||
@opindex floop-interchange
|
||||
Perform loop interchange transformations on loops. Interchanging two
|
||||
nested loops switches the inner and outer loops. For example, given a
|
||||
loop like:
|
||||
|
@ -6826,6 +6827,7 @@ with @option{--with-ppl} and @option{--with-cloog} to enable the
|
|||
Graphite loop transformation infrastructure.
|
||||
|
||||
@item -floop-strip-mine
|
||||
@opindex floop-strip-mine
|
||||
Perform loop strip mining transformations on loops. Strip mining
|
||||
splits a loop into two nested loops. The outer loop has strides
|
||||
equal to the strip size and the inner loop has strides of the
|
||||
|
@ -6851,6 +6853,7 @@ be configured with @option{--with-ppl} and @option{--with-cloog} to
|
|||
enable the Graphite loop transformation infrastructure.
|
||||
|
||||
@item -floop-block
|
||||
@opindex floop-block
|
||||
Perform loop blocking transformations on loops. Blocking strip mines
|
||||
each loop in the loop nest such that the memory accesses of the
|
||||
element loops fit inside caches. The strip length can be changed
|
||||
|
@ -6892,7 +6895,14 @@ GIMPLE -> GRAPHITE -> GIMPLE transformation. Some minimal optimizations
|
|||
are also performed by the code generator CLooG, like index splitting and
|
||||
dead code elimination in loops.
|
||||
|
||||
@item -floop-flatten
|
||||
@opindex floop-flatten
|
||||
Removes the loop nesting structure: transforms the loop nest into a
|
||||
single loop. This transformation can be useful to vectorize all the
|
||||
levels of the loop nest.
|
||||
|
||||
@item -floop-parallelize-all
|
||||
@opindex floop-parallelize-all
|
||||
Use the Graphite data dependence analysis to identify loops that can
|
||||
be parallelized. Parallelize all the loops that can be analyzed to
|
||||
not contain loop carried dependences without checking that it is
|
||||
|
|
442
gcc/graphite-flattening.c
Normal file
442
gcc/graphite-flattening.c
Normal file
|
@ -0,0 +1,442 @@
|
|||
/* Loop flattening for Graphite.
|
||||
Copyright (C) 2010 Free Software Foundation, Inc.
|
||||
Contributed by Sebastian Pop <sebastian.pop@amd.com>.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with GCC; see the file COPYING3. If not see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "config.h"
|
||||
#include "system.h"
|
||||
#include "coretypes.h"
|
||||
#include "tm.h"
|
||||
#include "ggc.h"
|
||||
#include "tree.h"
|
||||
#include "rtl.h"
|
||||
#include "output.h"
|
||||
#include "basic-block.h"
|
||||
#include "diagnostic.h"
|
||||
#include "tree-flow.h"
|
||||
#include "toplev.h"
|
||||
#include "tree-dump.h"
|
||||
#include "timevar.h"
|
||||
#include "cfgloop.h"
|
||||
#include "tree-chrec.h"
|
||||
#include "tree-data-ref.h"
|
||||
#include "tree-scalar-evolution.h"
|
||||
#include "tree-pass.h"
|
||||
#include "domwalk.h"
|
||||
#include "value-prof.h"
|
||||
#include "pointer-set.h"
|
||||
#include "gimple.h"
|
||||
#include "params.h"
|
||||
|
||||
#ifdef HAVE_cloog
|
||||
#include "ppl_c.h"
|
||||
#include "sese.h"
|
||||
#include "graphite-ppl.h"
|
||||
#include "graphite.h"
|
||||
#include "graphite-poly.h"
|
||||
|
||||
/* The loop flattening pass transforms loop nests into a single loop,
|
||||
removing the loop nesting structure. The auto-vectorization can
|
||||
then apply on the full loop body, without needing the outer-loop
|
||||
vectorization.
|
||||
|
||||
The canonical example is as follows: suppose that we have a loop
|
||||
nest with known iteration counts
|
||||
|
||||
| for (i = 1; i <= 6; i++)
|
||||
| for (j = 1; j <= 6; j++)
|
||||
| S1(i,j);
|
||||
|
||||
The loop flattening is performed by linearizing the iteration space
|
||||
using the function "f (x) = 6 * i + j". In this case, CLooG would
|
||||
produce this code:
|
||||
|
||||
| for (c1=7;c1<=42;c1++) {
|
||||
| i = floord(c1-1,6);
|
||||
| S1(i,c1-6*i);
|
||||
| }
|
||||
|
||||
There are several limitations for loop flattening that are linked
|
||||
to the expressivity of the polyhedral model. One has to take an
|
||||
upper bound approximation to deal with the parametric case of loop
|
||||
flattening. For example, in the loop nest:
|
||||
|
||||
| for (i = 1; i <= N; i++)
|
||||
| for (j = 1; j <= M; j++)
|
||||
| S1(i,j);
|
||||
|
||||
One would like to flatten this loop using a linearization function
|
||||
like this "f (x) = M * i + j". However CLooG's schedules are not
|
||||
expressive enough to deal with this case, and so the parameter M
|
||||
has to be replaced by an integer upper bound approximation. If we
|
||||
further know in the context of the scop that "M <= 6", then it is
|
||||
possible to linearize the loop with "f (x) = 6 * i + j". In this
|
||||
case, CLooG would produce this code:
|
||||
|
||||
| for (c1=7;c1<=6*M+N;c1++) {
|
||||
| i = ceild(c1-N,6);
|
||||
| if (i <= floord(c1-1,6)) {
|
||||
| S1(i,c1-6*i);
|
||||
| }
|
||||
| }
|
||||
|
||||
For an arbitrarily complex loop nests the algorithm proceeds in two
|
||||
steps. First, the LST is flattened by removing the loops structure
|
||||
and by inserting the statements in the order they appear in
|
||||
depth-first order. Then, the scattering of each statement is
|
||||
transformed such that it
|
||||
|
||||
Supposing that the original program is represented by the following
|
||||
LST:
|
||||
|
||||
| (loop_1
|
||||
| stmt_1
|
||||
| (loop_2 stmt_3
|
||||
| (loop_3 stmt_4)
|
||||
| (loop_4 stmt_5 stmt_6)
|
||||
| stmt_7
|
||||
| )
|
||||
| stmt_2
|
||||
| )
|
||||
|
||||
Loop flattening traverses the LST in depth-first order, and
|
||||
flattens pairs of loops successively by projecting the inner loops
|
||||
in the iteration domain of the outer loops:
|
||||
|
||||
lst_project_loop (loop_2, loop_3, stride)
|
||||
|
||||
| (loop_1
|
||||
| stmt_1
|
||||
| (loop_2 stmt_3 stmt_4
|
||||
| (loop_4 stmt_5 stmt_6)
|
||||
| stmt_7
|
||||
| )
|
||||
| stmt_2
|
||||
| )
|
||||
|
||||
lst_project_loop (loop_2, loop_4, stride)
|
||||
|
||||
| (loop_1
|
||||
| stmt_1
|
||||
| (loop_2 stmt_3 stmt_4 stmt_5 stmt_6 stmt_7)
|
||||
| stmt_2
|
||||
| )
|
||||
|
||||
lst_project_loop (loop_1, loop_2, stride)
|
||||
|
||||
| (loop_1
|
||||
| stmt_1 stmt_3 stmt_4 stmt_5 stmt_6 stmt_7 stmt_2
|
||||
| )
|
||||
|
||||
At each step, the iteration domain of the outer loop is enlarged to
|
||||
contain enough points to iterate over the inner loop domain. */
|
||||
|
||||
/* Initializes RES to the number of iterations of the linearized loop
|
||||
LST. RES is the cardinal of the iteration domain of LST. */
|
||||
|
||||
static void
|
||||
lst_linearized_niter (lst_p lst, mpz_t res)
|
||||
{
|
||||
int i;
|
||||
lst_p l;
|
||||
mpz_t n;
|
||||
|
||||
mpz_init (n);
|
||||
mpz_set_si (res, 0);
|
||||
|
||||
FOR_EACH_VEC_ELT (lst_p, LST_SEQ (lst), i, l)
|
||||
if (LST_LOOP_P (l))
|
||||
{
|
||||
lst_linearized_niter (l, n);
|
||||
mpz_add (res, res, n);
|
||||
}
|
||||
|
||||
if (LST_LOOP_P (lst))
|
||||
{
|
||||
lst_niter_for_loop (lst, n);
|
||||
|
||||
if (mpz_cmp_si (res, 0) != 0)
|
||||
mpz_mul (res, res, n);
|
||||
else
|
||||
mpz_set (res, n);
|
||||
}
|
||||
|
||||
mpz_clear (n);
|
||||
}
|
||||
|
||||
/* Applies the translation "f (x) = x + OFFSET" to the loop containing
|
||||
STMT. */
|
||||
|
||||
static void
|
||||
lst_offset (lst_p stmt, mpz_t offset)
|
||||
{
|
||||
lst_p inner = LST_LOOP_FATHER (stmt);
|
||||
poly_bb_p pbb = LST_PBB (stmt);
|
||||
ppl_Polyhedron_t poly = PBB_TRANSFORMED_SCATTERING (pbb);
|
||||
int inner_depth = lst_depth (inner);
|
||||
ppl_dimension_type inner_dim = psct_dynamic_dim (pbb, inner_depth);
|
||||
ppl_Linear_Expression_t expr;
|
||||
ppl_dimension_type dim;
|
||||
ppl_Coefficient_t one;
|
||||
mpz_t x;
|
||||
|
||||
mpz_init (x);
|
||||
mpz_set_si (x, 1);
|
||||
ppl_new_Coefficient (&one);
|
||||
ppl_assign_Coefficient_from_mpz_t (one, x);
|
||||
|
||||
ppl_Polyhedron_space_dimension (poly, &dim);
|
||||
ppl_new_Linear_Expression_with_dimension (&expr, dim);
|
||||
|
||||
ppl_set_coef (expr, inner_dim, 1);
|
||||
ppl_set_inhomogeneous_gmp (expr, offset);
|
||||
ppl_Polyhedron_affine_image (poly, inner_dim, expr, one);
|
||||
ppl_delete_Linear_Expression (expr);
|
||||
ppl_delete_Coefficient (one);
|
||||
}
|
||||
|
||||
/* Scale by FACTOR the loop LST containing STMT. */
|
||||
|
||||
static void
|
||||
lst_scale (lst_p lst, lst_p stmt, mpz_t factor)
|
||||
{
|
||||
mpz_t x;
|
||||
ppl_Coefficient_t one;
|
||||
int outer_depth = lst_depth (lst);
|
||||
poly_bb_p pbb = LST_PBB (stmt);
|
||||
ppl_Polyhedron_t poly = PBB_TRANSFORMED_SCATTERING (pbb);
|
||||
ppl_dimension_type outer_dim = psct_dynamic_dim (pbb, outer_depth);
|
||||
ppl_Linear_Expression_t expr;
|
||||
ppl_dimension_type dim;
|
||||
|
||||
mpz_init (x);
|
||||
mpz_set_si (x, 1);
|
||||
ppl_new_Coefficient (&one);
|
||||
ppl_assign_Coefficient_from_mpz_t (one, x);
|
||||
|
||||
ppl_Polyhedron_space_dimension (poly, &dim);
|
||||
ppl_new_Linear_Expression_with_dimension (&expr, dim);
|
||||
|
||||
/* outer_dim = factor * outer_dim. */
|
||||
ppl_set_coef_gmp (expr, outer_dim, factor);
|
||||
ppl_Polyhedron_affine_image (poly, outer_dim, expr, one);
|
||||
ppl_delete_Linear_Expression (expr);
|
||||
|
||||
mpz_clear (x);
|
||||
ppl_delete_Coefficient (one);
|
||||
}
|
||||
|
||||
/* Project the INNER loop into the iteration domain of the OUTER loop.
|
||||
STRIDE is the number of iterations between two iterations of the
|
||||
outer loop. */
|
||||
|
||||
static void
|
||||
lst_project_loop (lst_p outer, lst_p inner, mpz_t stride)
|
||||
{
|
||||
int i;
|
||||
lst_p stmt;
|
||||
mpz_t x;
|
||||
ppl_Coefficient_t one;
|
||||
int outer_depth = lst_depth (outer);
|
||||
int inner_depth = lst_depth (inner);
|
||||
|
||||
mpz_init (x);
|
||||
mpz_set_si (x, 1);
|
||||
ppl_new_Coefficient (&one);
|
||||
ppl_assign_Coefficient_from_mpz_t (one, x);
|
||||
|
||||
FOR_EACH_VEC_ELT (lst_p, LST_SEQ (inner), i, stmt)
|
||||
{
|
||||
poly_bb_p pbb = LST_PBB (stmt);
|
||||
ppl_Polyhedron_t poly = PBB_TRANSFORMED_SCATTERING (pbb);
|
||||
ppl_dimension_type outer_dim = psct_dynamic_dim (pbb, outer_depth);
|
||||
ppl_dimension_type inner_dim = psct_dynamic_dim (pbb, inner_depth);
|
||||
ppl_Linear_Expression_t expr;
|
||||
ppl_dimension_type dim;
|
||||
ppl_dimension_type *ds;
|
||||
|
||||
/* There should be no loops under INNER. */
|
||||
gcc_assert (!LST_LOOP_P (stmt));
|
||||
ppl_Polyhedron_space_dimension (poly, &dim);
|
||||
ppl_new_Linear_Expression_with_dimension (&expr, dim);
|
||||
|
||||
/* outer_dim = outer_dim * stride + inner_dim. */
|
||||
ppl_set_coef (expr, inner_dim, 1);
|
||||
ppl_set_coef_gmp (expr, outer_dim, stride);
|
||||
ppl_Polyhedron_affine_image (poly, outer_dim, expr, one);
|
||||
ppl_delete_Linear_Expression (expr);
|
||||
|
||||
/* Project on inner_dim. */
|
||||
ppl_new_Linear_Expression_with_dimension (&expr, dim - 1);
|
||||
ppl_Polyhedron_affine_image (poly, inner_dim, expr, one);
|
||||
ppl_delete_Linear_Expression (expr);
|
||||
|
||||
/* Remove inner loop and the static schedule of its body. */
|
||||
ds = XNEWVEC (ppl_dimension_type, 2);
|
||||
ds[0] = inner_dim;
|
||||
ds[1] = inner_dim + 1;
|
||||
ppl_Polyhedron_remove_space_dimensions (poly, ds, 2);
|
||||
PBB_NB_SCATTERING_TRANSFORM (pbb) -= 2;
|
||||
free (ds);
|
||||
}
|
||||
|
||||
mpz_clear (x);
|
||||
ppl_delete_Coefficient (one);
|
||||
}
|
||||
|
||||
/* Flattens the loop nest LST. Return true when something changed.
|
||||
OFFSET is used to compute the number of iterations of the outermost
|
||||
loop before the current LST is executed. */
|
||||
|
||||
static bool
|
||||
lst_flatten_loop (lst_p lst, mpz_t init_offset)
|
||||
{
|
||||
int i;
|
||||
lst_p l;
|
||||
bool res = false;
|
||||
mpz_t n, one, offset, stride;
|
||||
|
||||
mpz_init (n);
|
||||
mpz_init (one);
|
||||
mpz_init (offset);
|
||||
mpz_init (stride);
|
||||
mpz_set (offset, init_offset);
|
||||
mpz_set_si (one, 1);
|
||||
|
||||
lst_linearized_niter (lst, stride);
|
||||
lst_niter_for_loop (lst, n);
|
||||
mpz_tdiv_q (stride, stride, n);
|
||||
|
||||
FOR_EACH_VEC_ELT (lst_p, LST_SEQ (lst), i, l)
|
||||
if (LST_LOOP_P (l))
|
||||
{
|
||||
res = true;
|
||||
|
||||
lst_flatten_loop (l, offset);
|
||||
lst_niter_for_loop (l, n);
|
||||
|
||||
lst_project_loop (lst, l, stride);
|
||||
|
||||
/* The offset is the number of iterations minus 1, as we want
|
||||
to execute the next statements at the same iteration as the
|
||||
last iteration of the loop. */
|
||||
mpz_sub (n, n, one);
|
||||
mpz_add (offset, offset, n);
|
||||
}
|
||||
else
|
||||
{
|
||||
lst_scale (lst, l, stride);
|
||||
if (mpz_cmp_si (offset, 0) != 0)
|
||||
lst_offset (l, offset);
|
||||
}
|
||||
|
||||
FOR_EACH_VEC_ELT (lst_p, LST_SEQ (lst), i, l)
|
||||
if (LST_LOOP_P (l))
|
||||
lst_remove_loop_and_inline_stmts_in_loop_father (l);
|
||||
|
||||
mpz_clear (n);
|
||||
mpz_clear (one);
|
||||
mpz_clear (offset);
|
||||
mpz_clear (stride);
|
||||
return res;
|
||||
}
|
||||
|
||||
/* Remove all but the first 3 dimensions of the scattering:
|
||||
- dim0: the static schedule for the loop
|
||||
- dim1: the dynamic schedule of the loop
|
||||
- dim2: the static schedule for the loop body. */
|
||||
|
||||
static void
|
||||
remove_unused_scattering_dimensions (lst_p lst)
|
||||
{
|
||||
int i;
|
||||
lst_p stmt;
|
||||
mpz_t x;
|
||||
ppl_Coefficient_t one;
|
||||
|
||||
mpz_init (x);
|
||||
mpz_set_si (x, 1);
|
||||
ppl_new_Coefficient (&one);
|
||||
ppl_assign_Coefficient_from_mpz_t (one, x);
|
||||
|
||||
FOR_EACH_VEC_ELT (lst_p, LST_SEQ (lst), i, stmt)
|
||||
{
|
||||
poly_bb_p pbb = LST_PBB (stmt);
|
||||
ppl_Polyhedron_t poly = PBB_TRANSFORMED_SCATTERING (pbb);
|
||||
int j, nb_dims_to_remove = PBB_NB_SCATTERING_TRANSFORM (pbb) - 3;
|
||||
ppl_dimension_type *ds;
|
||||
|
||||
/* There should be no loops inside LST after flattening. */
|
||||
gcc_assert (!LST_LOOP_P (stmt));
|
||||
|
||||
if (!nb_dims_to_remove)
|
||||
continue;
|
||||
|
||||
ds = XNEWVEC (ppl_dimension_type, nb_dims_to_remove);
|
||||
for (j = 0; j < nb_dims_to_remove; j++)
|
||||
ds[j] = j + 3;
|
||||
|
||||
ppl_Polyhedron_remove_space_dimensions (poly, ds, nb_dims_to_remove);
|
||||
PBB_NB_SCATTERING_TRANSFORM (pbb) -= nb_dims_to_remove;
|
||||
free (ds);
|
||||
}
|
||||
|
||||
mpz_clear (x);
|
||||
ppl_delete_Coefficient (one);
|
||||
}
|
||||
|
||||
/* Flattens all the loop nests of LST. Return true when something
|
||||
changed. */
|
||||
|
||||
static bool
|
||||
lst_do_flatten (lst_p lst)
|
||||
{
|
||||
int i;
|
||||
lst_p l;
|
||||
bool res = false;
|
||||
mpz_t zero;
|
||||
|
||||
if (!lst
|
||||
|| !LST_LOOP_P (lst))
|
||||
return false;
|
||||
|
||||
mpz_init (zero);
|
||||
mpz_set_si (zero, 0);
|
||||
|
||||
FOR_EACH_VEC_ELT (lst_p, LST_SEQ (lst), i, l)
|
||||
if (LST_LOOP_P (l))
|
||||
{
|
||||
res |= lst_flatten_loop (l, zero);
|
||||
remove_unused_scattering_dimensions (l);
|
||||
}
|
||||
|
||||
lst_update_scattering (lst);
|
||||
mpz_clear (zero);
|
||||
return res;
|
||||
}
|
||||
|
||||
/* Flatten all the loop nests in SCOP. Returns true when something
|
||||
changed. */
|
||||
|
||||
bool
|
||||
flatten_all_loops (scop_p scop)
|
||||
{
|
||||
return lst_do_flatten (SCOP_TRANSFORMED_SCHEDULE (scop));
|
||||
}
|
||||
|
||||
#endif
|
|
@ -783,6 +783,9 @@ apply_poly_transforms (scop_p scop)
|
|||
transform_done |= scop_do_interchange (scop);
|
||||
}
|
||||
|
||||
if (flag_loop_flatten)
|
||||
transform_done |= flatten_all_loops (scop);
|
||||
|
||||
/* This feature is only enabled in the Graphite branch. */
|
||||
if (0)
|
||||
{
|
||||
|
@ -1688,7 +1691,8 @@ pbb_number_of_iterations_at_time (poly_bb_p pbb,
|
|||
ppl_delete_Constraint_System (cs);
|
||||
}
|
||||
|
||||
/* Compute the lower bound on the original iteration domain. */
|
||||
/* Compute the lower bound on the original iteration domain and add
|
||||
it to the scattering. */
|
||||
ppl_new_Pointset_Powerset_C_Polyhedron_from_C_Polyhedron
|
||||
(&sctr_lb, PBB_TRANSFORMED_SCATTERING (pbb));
|
||||
for (i = 0; i < (int) domain_dim; i++)
|
||||
|
|
|
@ -414,6 +414,7 @@ extern void debug_iteration_domains (scop_p, int);
|
|||
extern bool scop_do_interchange (scop_p);
|
||||
extern bool scop_do_strip_mine (scop_p);
|
||||
extern bool scop_do_block (scop_p);
|
||||
extern bool flatten_all_loops (scop_p);
|
||||
extern void pbb_number_of_iterations_at_time (poly_bb_p, graphite_dim_t, mpz_t);
|
||||
extern void pbb_remove_duplicate_pdrs (poly_bb_p);
|
||||
|
||||
|
@ -944,7 +945,7 @@ find_lst_loop (lst_p stmt, int loop_depth)
|
|||
return loop;
|
||||
}
|
||||
|
||||
/* Return the first lst representing a PBB statement in LST. */
|
||||
/* Return the first LST representing a PBB statement in LST. */
|
||||
|
||||
static inline lst_p
|
||||
lst_find_first_pbb (lst_p lst)
|
||||
|
@ -968,7 +969,7 @@ lst_find_first_pbb (lst_p lst)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
/* Returns true when LST is a loop that does not contains
|
||||
/* Returns true when LST is a loop that does not contain
|
||||
statements. */
|
||||
|
||||
static inline bool
|
||||
|
@ -977,7 +978,7 @@ lst_empty_p (lst_p lst)
|
|||
return !lst_find_first_pbb (lst);
|
||||
}
|
||||
|
||||
/* Return the last lst representing a PBB statement in LST. */
|
||||
/* Return the last LST representing a PBB statement in LST. */
|
||||
|
||||
static inline lst_p
|
||||
lst_find_last_pbb (lst_p lst)
|
||||
|
@ -1061,6 +1062,26 @@ lst_remove_from_sequence (lst_p lst)
|
|||
LST_LOOP_FATHER (lst) = NULL;
|
||||
}
|
||||
|
||||
/* Removes the loop LST and inline its body in the father loop. */
|
||||
|
||||
static inline void
|
||||
lst_remove_loop_and_inline_stmts_in_loop_father (lst_p lst)
|
||||
{
|
||||
lst_p l, father = LST_LOOP_FATHER (lst);
|
||||
int i, dewey = lst_dewey_number (lst);
|
||||
|
||||
gcc_assert (lst && father && dewey >= 0);
|
||||
|
||||
VEC_ordered_remove (lst_p, LST_SEQ (father), dewey);
|
||||
LST_LOOP_FATHER (lst) = NULL;
|
||||
|
||||
FOR_EACH_VEC_ELT (lst_p, LST_SEQ (lst), i, l)
|
||||
{
|
||||
VEC_safe_insert (lst_p, heap, LST_SEQ (father), dewey + i, l);
|
||||
LST_LOOP_FATHER (l) = father;
|
||||
}
|
||||
}
|
||||
|
||||
/* Sets NITER to the upper bound approximation of the number of
|
||||
iterations of loop LST. */
|
||||
|
||||
|
|
|
@ -303,8 +303,12 @@ gate_graphite_transforms (void)
|
|||
{
|
||||
/* Enable -fgraphite pass if any one of the graphite optimization flags
|
||||
is turned on. */
|
||||
if (flag_loop_block || flag_loop_interchange || flag_loop_strip_mine
|
||||
|| flag_graphite_identity || flag_loop_parallelize_all)
|
||||
if (flag_loop_block
|
||||
|| flag_loop_interchange
|
||||
|| flag_loop_strip_mine
|
||||
|| flag_graphite_identity
|
||||
|| flag_loop_parallelize_all
|
||||
|| flag_loop_flatten)
|
||||
flag_graphite = 1;
|
||||
|
||||
return flag_graphite != 0;
|
||||
|
|
Loading…
Add table
Reference in a new issue