tree-loop-distribution.c (INCLUDE_ALGORITHM): New header file.
* tree-loop-distribution.c (INCLUDE_ALGORITHM): New header file. (tree-ssa-loop-ivopts.h): New header file. (struct builtin_info): New fields. (classify_builtin_1): Compute and record base and offset parts for memset builtin partition by calling strip_offset. (offset_cmp, fuse_memset_builtins): New functions. (finalize_partitions): Fuse adjacent memset partitions by calling above function. * tree-ssa-loop-ivopts.c (strip_offset): Delete static declaration. Expose the interface. * tree-ssa-loop-ivopts.h (strip_offset): New declaration. * gcc.dg/tree-ssa/ldist-17.c: Adjust test string. * gcc.dg/tree-ssa/ldist-32.c: New test. * gcc.dg/tree-ssa/ldist-35.c: New test. * gcc.dg/tree-ssa/ldist-36.c: New test. From-SVN: r253859
This commit is contained in:
parent
85aa9ed64b
commit
957f0d8faf
9 changed files with 232 additions and 7 deletions
|
@ -1,3 +1,17 @@
|
|||
2017-10-18 Bin Cheng <bin.cheng@arm.com>
|
||||
|
||||
* tree-loop-distribution.c (INCLUDE_ALGORITHM): New header file.
|
||||
(tree-ssa-loop-ivopts.h): New header file.
|
||||
(struct builtin_info): New fields.
|
||||
(classify_builtin_1): Compute and record base and offset parts for
|
||||
memset builtin partition by calling strip_offset.
|
||||
(offset_cmp, fuse_memset_builtins): New functions.
|
||||
(finalize_partitions): Fuse adjacent memset partitions by calling
|
||||
above function.
|
||||
* tree-ssa-loop-ivopts.c (strip_offset): Delete static declaration.
|
||||
Expose the interface.
|
||||
* tree-ssa-loop-ivopts.h (strip_offset): New declaration.
|
||||
|
||||
2017-10-18 Bin Cheng <bin.cheng@arm.com>
|
||||
|
||||
PR tree-optimization/82574
|
||||
|
|
|
@ -1,3 +1,10 @@
|
|||
2017-10-18 Bin Cheng <bin.cheng@arm.com>
|
||||
|
||||
* gcc.dg/tree-ssa/ldist-17.c: Adjust test string.
|
||||
* gcc.dg/tree-ssa/ldist-32.c: New test.
|
||||
* gcc.dg/tree-ssa/ldist-35.c: New test.
|
||||
* gcc.dg/tree-ssa/ldist-36.c: New test.
|
||||
|
||||
2017-10-18 Bin Cheng <bin.cheng@arm.com>
|
||||
|
||||
PR tree-optimization/82574
|
||||
|
|
|
@ -45,5 +45,5 @@ mad_synth_mute (struct mad_synth *synth)
|
|||
return;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump "distributed: split to 0 loops and 4 library calls" "ldist" } } */
|
||||
/* { dg-final { scan-tree-dump-times "generated memset zero" 4 "ldist" } } */
|
||||
/* { dg-final { scan-tree-dump "Loop nest . distributed: split to 0 loops and 1 library calls" "ldist" } } */
|
||||
/* { dg-final { scan-tree-dump-times "generated memset zero" 1 "ldist" } } */
|
||||
|
|
29
gcc/testsuite/gcc.dg/tree-ssa/ldist-32.c
Normal file
29
gcc/testsuite/gcc.dg/tree-ssa/ldist-32.c
Normal file
|
@ -0,0 +1,29 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -ftree-loop-distribution -ftree-loop-distribute-patterns -fdump-tree-ldist-details" } */
|
||||
|
||||
#define M (256)
|
||||
#define N (512)
|
||||
|
||||
struct st
|
||||
{
|
||||
int a[M][N];
|
||||
int c[M];
|
||||
int b[M][N];
|
||||
};
|
||||
|
||||
void
|
||||
foo (struct st *p)
|
||||
{
|
||||
for (unsigned i = 0; i < M; ++i)
|
||||
{
|
||||
p->c[i] = 0;
|
||||
for (unsigned j = N; j > 0; --j)
|
||||
{
|
||||
p->a[i][j - 1] = 0;
|
||||
p->b[i][j - 1] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "Loop nest . distributed: split to 0 loops and 1 library" 1 "ldist" } } */
|
||||
/* { dg-final { scan-tree-dump-times "__builtin_memset \\(.*, 0, 1049600\\);" 1 "ldist" } } */
|
28
gcc/testsuite/gcc.dg/tree-ssa/ldist-35.c
Normal file
28
gcc/testsuite/gcc.dg/tree-ssa/ldist-35.c
Normal file
|
@ -0,0 +1,28 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -ftree-loop-distribution -ftree-loop-distribute-patterns -fdump-tree-ldist-details" } */
|
||||
|
||||
#define M (256)
|
||||
#define N (512)
|
||||
|
||||
struct st
|
||||
{
|
||||
int a[M][N];
|
||||
int c[M];
|
||||
int b[M][N];
|
||||
};
|
||||
|
||||
void
|
||||
foo (struct st * restrict p, struct st * restrict q)
|
||||
{
|
||||
for (unsigned i = 0; i < M; ++i)
|
||||
{
|
||||
p->c[i] = 0;
|
||||
for (unsigned j = N; j > 0; --j)
|
||||
{
|
||||
p->a[i][j - 1] = q->a[i][j - 1];
|
||||
p->b[i][j - 1] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "Loop nest . distributed: split to 0 loops and 1 library" 1 "ldist" { xfail *-*-* } } } */
|
28
gcc/testsuite/gcc.dg/tree-ssa/ldist-36.c
Normal file
28
gcc/testsuite/gcc.dg/tree-ssa/ldist-36.c
Normal file
|
@ -0,0 +1,28 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -ftree-loop-distribution -ftree-loop-distribute-patterns -fdump-tree-ldist-details" } */
|
||||
|
||||
#define M (256)
|
||||
#define N (512)
|
||||
|
||||
struct st
|
||||
{
|
||||
int a[M][N];
|
||||
int c[M];
|
||||
int b[M][N];
|
||||
};
|
||||
|
||||
void
|
||||
foo (struct st * restrict p)
|
||||
{
|
||||
for (unsigned i = 0; i < M; ++i)
|
||||
{
|
||||
p->c[i] = 0;
|
||||
for (unsigned j = N; j > 0; --j)
|
||||
{
|
||||
p->b[i][j - 1] = p->a[i][j - 1];
|
||||
p->a[i][j - 1] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "Loop nest . distributed: split to 0 loops and 3 library" 1 "ldist" } } */
|
|
@ -90,6 +90,7 @@ along with GCC; see the file COPYING3. If not see
|
|||
data reuse. */
|
||||
|
||||
#include "config.h"
|
||||
#define INCLUDE_ALGORITHM /* stable_sort */
|
||||
#include "system.h"
|
||||
#include "coretypes.h"
|
||||
#include "backend.h"
|
||||
|
@ -106,6 +107,7 @@ along with GCC; see the file COPYING3. If not see
|
|||
#include "stor-layout.h"
|
||||
#include "tree-cfg.h"
|
||||
#include "tree-ssa-loop-manip.h"
|
||||
#include "tree-ssa-loop-ivopts.h"
|
||||
#include "tree-ssa-loop.h"
|
||||
#include "tree-into-ssa.h"
|
||||
#include "tree-ssa.h"
|
||||
|
@ -604,6 +606,10 @@ struct builtin_info
|
|||
tree dst_base;
|
||||
tree src_base;
|
||||
tree size;
|
||||
/* Base and offset part of dst_base after stripping constant offset. This
|
||||
is only used in memset builtin distribution for now. */
|
||||
tree dst_base_base;
|
||||
unsigned HOST_WIDE_INT dst_base_offset;
|
||||
};
|
||||
|
||||
/* Partition for loop distribution. */
|
||||
|
@ -1504,7 +1510,11 @@ classify_builtin_st (loop_p loop, partition *partition, data_reference_p dr)
|
|||
if (!compute_access_range (loop, dr, &base, &size))
|
||||
return;
|
||||
|
||||
partition->builtin = alloc_builtin (dr, NULL, base, NULL_TREE, size);
|
||||
struct builtin_info *builtin;
|
||||
builtin = alloc_builtin (dr, NULL, base, NULL_TREE, size);
|
||||
builtin->dst_base_base = strip_offset (builtin->dst_base,
|
||||
&builtin->dst_base_offset);
|
||||
partition->builtin = builtin;
|
||||
partition->kind = PKIND_MEMSET;
|
||||
}
|
||||
|
||||
|
@ -2480,6 +2490,113 @@ version_for_distribution_p (vec<struct partition *> *partitions,
|
|||
return (alias_ddrs->length () > 0);
|
||||
}
|
||||
|
||||
/* Compare base offset of builtin mem* partitions P1 and P2. */
|
||||
|
||||
static bool
|
||||
offset_cmp (struct partition *p1, struct partition *p2)
|
||||
{
|
||||
gcc_assert (p1 != NULL && p1->builtin != NULL);
|
||||
gcc_assert (p2 != NULL && p2->builtin != NULL);
|
||||
return p1->builtin->dst_base_offset < p2->builtin->dst_base_offset;
|
||||
}
|
||||
|
||||
/* Fuse adjacent memset builtin PARTITIONS if possible. This is a special
|
||||
case optimization transforming below code:
|
||||
|
||||
__builtin_memset (&obj, 0, 100);
|
||||
_1 = &obj + 100;
|
||||
__builtin_memset (_1, 0, 200);
|
||||
_2 = &obj + 300;
|
||||
__builtin_memset (_2, 0, 100);
|
||||
|
||||
into:
|
||||
|
||||
__builtin_memset (&obj, 0, 400);
|
||||
|
||||
Note we don't have dependence information between different partitions
|
||||
at this point, as a result, we can't handle nonadjacent memset builtin
|
||||
partitions since dependence might be broken. */
|
||||
|
||||
static void
|
||||
fuse_memset_builtins (vec<struct partition *> *partitions)
|
||||
{
|
||||
unsigned i, j;
|
||||
struct partition *part1, *part2;
|
||||
|
||||
for (i = 0; partitions->iterate (i, &part1);)
|
||||
{
|
||||
if (part1->kind != PKIND_MEMSET)
|
||||
{
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Find sub-array of memset builtins of the same base. Index range
|
||||
of the sub-array is [i, j) with "j > i". */
|
||||
for (j = i + 1; partitions->iterate (j, &part2); ++j)
|
||||
{
|
||||
if (part2->kind != PKIND_MEMSET
|
||||
|| !operand_equal_p (part1->builtin->dst_base_base,
|
||||
part2->builtin->dst_base_base, 0))
|
||||
break;
|
||||
}
|
||||
|
||||
/* Stable sort is required in order to avoid breaking dependence. */
|
||||
std::stable_sort (&(*partitions)[i],
|
||||
&(*partitions)[i] + j - i, offset_cmp);
|
||||
/* Continue with next partition. */
|
||||
i = j;
|
||||
}
|
||||
|
||||
/* Merge all consecutive memset builtin partitions. */
|
||||
for (i = 0; i < partitions->length () - 1;)
|
||||
{
|
||||
part1 = (*partitions)[i];
|
||||
if (part1->kind != PKIND_MEMSET)
|
||||
{
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
|
||||
part2 = (*partitions)[i + 1];
|
||||
/* Only merge memset partitions of the same base and with constant
|
||||
access sizes. */
|
||||
if (part2->kind != PKIND_MEMSET
|
||||
|| TREE_CODE (part1->builtin->size) != INTEGER_CST
|
||||
|| TREE_CODE (part2->builtin->size) != INTEGER_CST
|
||||
|| !operand_equal_p (part1->builtin->dst_base_base,
|
||||
part2->builtin->dst_base_base, 0))
|
||||
{
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
tree rhs1 = gimple_assign_rhs1 (DR_STMT (part1->builtin->dst_dr));
|
||||
tree rhs2 = gimple_assign_rhs1 (DR_STMT (part2->builtin->dst_dr));
|
||||
int bytev1 = const_with_all_bytes_same (rhs1);
|
||||
int bytev2 = const_with_all_bytes_same (rhs2);
|
||||
/* Only merge memset partitions of the same value. */
|
||||
if (bytev1 != bytev2 || bytev1 == -1)
|
||||
{
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
wide_int end1 = wi::add (part1->builtin->dst_base_offset,
|
||||
wi::to_wide (part1->builtin->size));
|
||||
/* Only merge adjacent memset partitions. */
|
||||
if (wi::ne_p (end1, part2->builtin->dst_base_offset))
|
||||
{
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
/* Merge partitions[i] and partitions[i+1]. */
|
||||
part1->builtin->size = fold_build2 (PLUS_EXPR, sizetype,
|
||||
part1->builtin->size,
|
||||
part2->builtin->size);
|
||||
partition_free (part2);
|
||||
partitions->ordered_remove (i + 1);
|
||||
}
|
||||
}
|
||||
|
||||
/* Fuse PARTITIONS of LOOP if necessary before finalizing distribution.
|
||||
ALIAS_DDRS contains ddrs which need runtime alias check. */
|
||||
|
||||
|
@ -2523,6 +2640,10 @@ finalize_partitions (struct loop *loop, vec<struct partition *> *partitions,
|
|||
}
|
||||
partitions->truncate (1);
|
||||
}
|
||||
|
||||
/* Fuse memset builtins if possible. */
|
||||
if (partitions->length () > 1)
|
||||
fuse_memset_builtins (partitions);
|
||||
}
|
||||
|
||||
/* Distributes the code from LOOP in such a way that producer statements
|
||||
|
|
|
@ -1550,9 +1550,6 @@ record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
|
|||
bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
|
||||
}
|
||||
|
||||
static tree
|
||||
strip_offset (tree expr, unsigned HOST_WIDE_INT *offset);
|
||||
|
||||
/* Record a group of TYPE. */
|
||||
|
||||
static struct iv_group *
|
||||
|
@ -2863,7 +2860,7 @@ strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
|
|||
|
||||
/* Strips constant offsets from EXPR and stores them to OFFSET. */
|
||||
|
||||
static tree
|
||||
tree
|
||||
strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
|
||||
{
|
||||
HOST_WIDE_INT off;
|
||||
|
|
|
@ -28,6 +28,7 @@ extern void dump_cand (FILE *, struct iv_cand *);
|
|||
extern bool contains_abnormal_ssa_name_p (tree);
|
||||
extern struct loop *outermost_invariant_loop_for_expr (struct loop *, tree);
|
||||
extern bool expr_invariant_in_loop_p (struct loop *, tree);
|
||||
extern tree strip_offset (tree, unsigned HOST_WIDE_INT *);
|
||||
bool may_be_nonaddressable_p (tree expr);
|
||||
void tree_ssa_iv_optimize (void);
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue