_BitInt lowering support [PR102989]
The following patch adds a new bitintlower lowering pass which lowers most operations on medium _BitInt into operations on corresponding integer types, large _BitInt into straight line code operating on 2 or more limbs and finally huge _BitInt into a loop plus optional straight line code. As the only supported architecture is little-endian, the lowering only supports little-endian for now, because it would be impossible to test it all for big-endian. Rest is written with any endian support in mind, but of course only little-endian has been actually tested. I hope it is ok to add big-endian support to the lowering pass incrementally later when first big-endian target shows with the backend support. There are 2 possibilities of adding such support, one would be minimal one, just tweak limb_access function and perhaps one or two other spots and transform there the indexes from little endian (index 0 is least significant) to big endian for just the memory access. Advantage is I think maintainance costs, disadvantage is that the loops will still iterate from 0 to some number of limbs and we'd rely on IVOPTs or something similar changing it later if needed. Or we could make those indexes endian related everywhere, though I'm afraid that would be several hundreds of changes. For switches indexed by large/huge _BitInt the patch invokes what the switch lowering pass does (but only on those specific switches, not all of them); the switch lowering breaks the switches into clusters and none of the clusters can have a range which doesn't fit into 64-bit UWHI, everything else will be turned into a tree of comparisons. For clusters normally emitted as smaller switches, because we already have a guarantee that the low .. high range is at most 64 bits, the patch forces subtraction of the low and turns it into a 64-bit switch. This is done before the actual pass starts. Similarly, we cancel lowering of certain constructs like ABS_EXPR, ABSU_EXPR, MIN_EXPR, MAX_EXPR and COND_EXPR and turn those back to simpler comparisons etc., so that fewer operations need to be lowered later. 2023-09-06 Jakub Jelinek <jakub@redhat.com> PR c/102989 * Makefile.in (OBJS): Add gimple-lower-bitint.o. * passes.def: Add pass_lower_bitint after pass_lower_complex and pass_lower_bitint_O0 after pass_lower_complex_O0. * tree-pass.h (PROP_gimple_lbitint): Define. (make_pass_lower_bitint_O0, make_pass_lower_bitint): Declare. * gimple-lower-bitint.h: New file. * tree-ssa-live.h (struct _var_map): Add bitint member. (init_var_map): Adjust declaration. (region_contains_p): Handle map->bitint like map->outofssa_p. * tree-ssa-live.cc (init_var_map): Add BITINT argument, initialize map->bitint and set map->outofssa_p to false if it is non-NULL. * tree-ssa-coalesce.cc: Include gimple-lower-bitint.h. (build_ssa_conflict_graph): Call build_bitint_stmt_ssa_conflicts if map->bitint. (create_coalesce_list_for_region): For map->bitint ignore SSA_NAMEs not in that bitmap, and allow res without default def. (compute_optimized_partition_bases): In map->bitint mode try hard to coalesce any SSA_NAMEs with the same size. (coalesce_bitint): New function. (coalesce_ssa_name): In map->bitint mode, or map->bitmap into used_in_copies and call coalesce_bitint. * gimple-lower-bitint.cc: New file.
This commit is contained in:
parent
4f4fa25011
commit
a9d6c7fbeb
8 changed files with 6270 additions and 6 deletions
|
@ -1457,6 +1457,7 @@ OBJS = \
|
|||
gimple-loop-jam.o \
|
||||
gimple-loop-versioning.o \
|
||||
gimple-low.o \
|
||||
gimple-lower-bitint.o \
|
||||
gimple-predicate-analysis.o \
|
||||
gimple-pretty-print.o \
|
||||
gimple-range.o \
|
||||
|
|
6074
gcc/gimple-lower-bitint.cc
Normal file
6074
gcc/gimple-lower-bitint.cc
Normal file
File diff suppressed because it is too large
Load diff
31
gcc/gimple-lower-bitint.h
Normal file
31
gcc/gimple-lower-bitint.h
Normal file
|
@ -0,0 +1,31 @@
|
|||
/* Header file for gimple-lower-bitint.cc exports.
|
||||
Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free
|
||||
Software Foundation; either version 3, or (at your option) any later
|
||||
version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with GCC; see the file COPYING3. If not see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef GCC_GIMPLE_LOWER_BITINT_H
|
||||
#define GCC_GIMPLE_LOWER_BITINT_H
|
||||
|
||||
class live_track;
|
||||
struct ssa_conflicts;
|
||||
extern void build_bitint_stmt_ssa_conflicts (gimple *, live_track *,
|
||||
ssa_conflicts *, bitmap,
|
||||
void (*) (live_track *, tree,
|
||||
ssa_conflicts *),
|
||||
void (*) (live_track *, tree));
|
||||
|
||||
#endif /* GCC_GIMPLE_LOWER_BITINT_H */
|
|
@ -237,6 +237,7 @@ along with GCC; see the file COPYING3. If not see
|
|||
NEXT_PASS (pass_tail_recursion);
|
||||
NEXT_PASS (pass_ch);
|
||||
NEXT_PASS (pass_lower_complex);
|
||||
NEXT_PASS (pass_lower_bitint);
|
||||
NEXT_PASS (pass_sra);
|
||||
/* The dom pass will also resolve all __builtin_constant_p calls
|
||||
that are still there to 0. This has to be done after some
|
||||
|
@ -386,6 +387,7 @@ along with GCC; see the file COPYING3. If not see
|
|||
NEXT_PASS (pass_strip_predict_hints, false /* early_p */);
|
||||
/* Lower remaining pieces of GIMPLE. */
|
||||
NEXT_PASS (pass_lower_complex);
|
||||
NEXT_PASS (pass_lower_bitint);
|
||||
NEXT_PASS (pass_lower_vector_ssa);
|
||||
NEXT_PASS (pass_lower_switch);
|
||||
/* Perform simple scalar cleanup which is constant/copy propagation. */
|
||||
|
@ -429,6 +431,7 @@ along with GCC; see the file COPYING3. If not see
|
|||
NEXT_PASS (pass_lower_vaarg);
|
||||
NEXT_PASS (pass_lower_vector);
|
||||
NEXT_PASS (pass_lower_complex_O0);
|
||||
NEXT_PASS (pass_lower_bitint_O0);
|
||||
NEXT_PASS (pass_sancov_O0);
|
||||
NEXT_PASS (pass_lower_switch_O0);
|
||||
NEXT_PASS (pass_asan_O0);
|
||||
|
|
|
@ -229,6 +229,7 @@ protected:
|
|||
have completed. */
|
||||
#define PROP_assumptions_done (1 << 19) /* Assume function kept
|
||||
around. */
|
||||
#define PROP_gimple_lbitint (1 << 20) /* lowered large _BitInt */
|
||||
|
||||
#define PROP_gimple \
|
||||
(PROP_gimple_any | PROP_gimple_lcf | PROP_gimple_leh | PROP_gimple_lomp)
|
||||
|
@ -420,6 +421,8 @@ extern gimple_opt_pass *make_pass_strip_predict_hints (gcc::context *ctxt);
|
|||
extern gimple_opt_pass *make_pass_rebuild_frequencies (gcc::context *ctxt);
|
||||
extern gimple_opt_pass *make_pass_lower_complex_O0 (gcc::context *ctxt);
|
||||
extern gimple_opt_pass *make_pass_lower_complex (gcc::context *ctxt);
|
||||
extern gimple_opt_pass *make_pass_lower_bitint_O0 (gcc::context *ctxt);
|
||||
extern gimple_opt_pass *make_pass_lower_bitint (gcc::context *ctxt);
|
||||
extern gimple_opt_pass *make_pass_lower_switch (gcc::context *ctxt);
|
||||
extern gimple_opt_pass *make_pass_lower_switch_O0 (gcc::context *ctxt);
|
||||
extern gimple_opt_pass *make_pass_lower_vector (gcc::context *ctxt);
|
||||
|
|
|
@ -38,6 +38,7 @@ along with GCC; see the file COPYING3. If not see
|
|||
#include "explow.h"
|
||||
#include "tree-dfa.h"
|
||||
#include "stor-layout.h"
|
||||
#include "gimple-lower-bitint.h"
|
||||
|
||||
/* This set of routines implements a coalesce_list. This is an object which
|
||||
is used to track pairs of ssa_names which are desirable to coalesce
|
||||
|
@ -914,6 +915,14 @@ build_ssa_conflict_graph (tree_live_info_p liveinfo)
|
|||
else if (is_gimple_debug (stmt))
|
||||
continue;
|
||||
|
||||
if (map->bitint)
|
||||
{
|
||||
build_bitint_stmt_ssa_conflicts (stmt, live, graph, map->bitint,
|
||||
live_track_process_def,
|
||||
live_track_process_use);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* For stmts with more than one SSA_NAME definition pretend all the
|
||||
SSA_NAME outputs but the first one are live at this point, so
|
||||
that conflicts are added in between all those even when they are
|
||||
|
@ -1058,6 +1067,8 @@ create_coalesce_list_for_region (var_map map, bitmap used_in_copy)
|
|||
if (virtual_operand_p (res))
|
||||
continue;
|
||||
ver = SSA_NAME_VERSION (res);
|
||||
if (map->bitint && !bitmap_bit_p (map->bitint, ver))
|
||||
continue;
|
||||
|
||||
/* Register ssa_names and coalesces between the args and the result
|
||||
of all PHI. */
|
||||
|
@ -1106,6 +1117,8 @@ create_coalesce_list_for_region (var_map map, bitmap used_in_copy)
|
|||
{
|
||||
v1 = SSA_NAME_VERSION (lhs);
|
||||
v2 = SSA_NAME_VERSION (rhs1);
|
||||
if (map->bitint && !bitmap_bit_p (map->bitint, v1))
|
||||
break;
|
||||
cost = coalesce_cost_bb (bb);
|
||||
add_coalesce (cl, v1, v2, cost);
|
||||
bitmap_set_bit (used_in_copy, v1);
|
||||
|
@ -1124,12 +1137,16 @@ create_coalesce_list_for_region (var_map map, bitmap used_in_copy)
|
|||
if (!rhs1)
|
||||
break;
|
||||
tree lhs = ssa_default_def (cfun, res);
|
||||
if (map->bitint && !lhs)
|
||||
break;
|
||||
gcc_assert (lhs);
|
||||
if (TREE_CODE (rhs1) == SSA_NAME
|
||||
&& gimple_can_coalesce_p (lhs, rhs1))
|
||||
{
|
||||
v1 = SSA_NAME_VERSION (lhs);
|
||||
v2 = SSA_NAME_VERSION (rhs1);
|
||||
if (map->bitint && !bitmap_bit_p (map->bitint, v1))
|
||||
break;
|
||||
cost = coalesce_cost_bb (bb);
|
||||
add_coalesce (cl, v1, v2, cost);
|
||||
bitmap_set_bit (used_in_copy, v1);
|
||||
|
@ -1177,6 +1194,8 @@ create_coalesce_list_for_region (var_map map, bitmap used_in_copy)
|
|||
|
||||
v1 = SSA_NAME_VERSION (outputs[match]);
|
||||
v2 = SSA_NAME_VERSION (input);
|
||||
if (map->bitint && !bitmap_bit_p (map->bitint, v1))
|
||||
continue;
|
||||
|
||||
if (gimple_can_coalesce_p (outputs[match], input))
|
||||
{
|
||||
|
@ -1651,6 +1670,33 @@ compute_optimized_partition_bases (var_map map, bitmap used_in_copies,
|
|||
}
|
||||
}
|
||||
|
||||
if (map->bitint
|
||||
&& flag_tree_coalesce_vars
|
||||
&& (optimize > 1 || parts < 500))
|
||||
for (i = 0; i < (unsigned) parts; ++i)
|
||||
{
|
||||
tree s1 = partition_to_var (map, i);
|
||||
int p1 = partition_find (tentative, i);
|
||||
for (unsigned j = i + 1; j < (unsigned) parts; ++j)
|
||||
{
|
||||
tree s2 = partition_to_var (map, j);
|
||||
if (s1 == s2)
|
||||
continue;
|
||||
if (tree_int_cst_equal (TYPE_SIZE (TREE_TYPE (s1)),
|
||||
TYPE_SIZE (TREE_TYPE (s2))))
|
||||
{
|
||||
int p2 = partition_find (tentative, j);
|
||||
|
||||
if (p1 == p2)
|
||||
continue;
|
||||
|
||||
partition_union (tentative, p1, p2);
|
||||
if (partition_find (tentative, i) != p1)
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
map->partition_to_base_index = XCNEWVEC (int, parts);
|
||||
auto_vec<unsigned int> index_map (parts);
|
||||
if (parts)
|
||||
|
@ -1692,6 +1738,101 @@ compute_optimized_partition_bases (var_map map, bitmap used_in_copies,
|
|||
partition_delete (tentative);
|
||||
}
|
||||
|
||||
/* For the bitint lowering pass, try harder. Partitions which contain
|
||||
SSA_NAME default def of a PARM_DECL or have RESULT_DECL need to have
|
||||
compatible types because they will use that RESULT_DECL or PARM_DECL.
|
||||
Other partitions can have even incompatible _BitInt types, as long
|
||||
as they have the same size - those will use VAR_DECLs which are just
|
||||
arrays of the limbs. */
|
||||
|
||||
static void
|
||||
coalesce_bitint (var_map map, ssa_conflicts *graph)
|
||||
{
|
||||
unsigned n = num_var_partitions (map);
|
||||
if (optimize <= 1 && n > 500)
|
||||
return;
|
||||
|
||||
bool try_same_size = false;
|
||||
FILE *debug_file = (dump_flags & TDF_DETAILS) ? dump_file : NULL;
|
||||
for (unsigned i = 0; i < n; ++i)
|
||||
{
|
||||
tree s1 = partition_to_var (map, i);
|
||||
if ((unsigned) var_to_partition (map, s1) != i)
|
||||
continue;
|
||||
int v1 = SSA_NAME_VERSION (s1);
|
||||
for (unsigned j = i + 1; j < n; ++j)
|
||||
{
|
||||
tree s2 = partition_to_var (map, j);
|
||||
if (s1 == s2 || (unsigned) var_to_partition (map, s2) != j)
|
||||
continue;
|
||||
if (!types_compatible_p (TREE_TYPE (s1), TREE_TYPE (s2)))
|
||||
{
|
||||
if (!try_same_size
|
||||
&& tree_int_cst_equal (TYPE_SIZE (TREE_TYPE (s1)),
|
||||
TYPE_SIZE (TREE_TYPE (s2))))
|
||||
try_same_size = true;
|
||||
continue;
|
||||
}
|
||||
int v2 = SSA_NAME_VERSION (s2);
|
||||
if (attempt_coalesce (map, graph, v1, v2, debug_file)
|
||||
&& partition_to_var (map, i) != s1)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!try_same_size)
|
||||
return;
|
||||
|
||||
unsigned i;
|
||||
bitmap_iterator bi;
|
||||
bitmap same_type = NULL;
|
||||
|
||||
EXECUTE_IF_SET_IN_BITMAP (map->bitint, 0, i, bi)
|
||||
{
|
||||
tree s = ssa_name (i);
|
||||
if (!SSA_NAME_VAR (s))
|
||||
continue;
|
||||
if (TREE_CODE (SSA_NAME_VAR (s)) != RESULT_DECL
|
||||
&& (TREE_CODE (SSA_NAME_VAR (s)) != PARM_DECL
|
||||
|| !SSA_NAME_IS_DEFAULT_DEF (s)))
|
||||
continue;
|
||||
if (same_type == NULL)
|
||||
same_type = BITMAP_ALLOC (NULL);
|
||||
int p = var_to_partition (map, s);
|
||||
bitmap_set_bit (same_type, p);
|
||||
}
|
||||
|
||||
for (i = 0; i < n; ++i)
|
||||
{
|
||||
if (same_type && bitmap_bit_p (same_type, i))
|
||||
continue;
|
||||
tree s1 = partition_to_var (map, i);
|
||||
if ((unsigned) var_to_partition (map, s1) != i)
|
||||
continue;
|
||||
int v1 = SSA_NAME_VERSION (s1);
|
||||
for (unsigned j = i + 1; j < n; ++j)
|
||||
{
|
||||
if (same_type && bitmap_bit_p (same_type, j))
|
||||
continue;
|
||||
|
||||
tree s2 = partition_to_var (map, j);
|
||||
if (s1 == s2 || (unsigned) var_to_partition (map, s2) != j)
|
||||
continue;
|
||||
|
||||
if (!tree_int_cst_equal (TYPE_SIZE (TREE_TYPE (s1)),
|
||||
TYPE_SIZE (TREE_TYPE (s2))))
|
||||
continue;
|
||||
|
||||
int v2 = SSA_NAME_VERSION (s2);
|
||||
if (attempt_coalesce (map, graph, v1, v2, debug_file)
|
||||
&& partition_to_var (map, i) != s1)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
BITMAP_FREE (same_type);
|
||||
}
|
||||
|
||||
/* Given an initial var_map MAP, coalesce variables and return a partition map
|
||||
with the resulting coalesce. Note that this function is called in either
|
||||
live range computation context or out-of-ssa context, indicated by MAP. */
|
||||
|
@ -1709,6 +1850,8 @@ coalesce_ssa_name (var_map map)
|
|||
if (map->outofssa_p)
|
||||
populate_coalesce_list_for_outofssa (cl, used_in_copies);
|
||||
bitmap_list_view (used_in_copies);
|
||||
if (map->bitint)
|
||||
bitmap_ior_into (used_in_copies, map->bitint);
|
||||
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
dump_var_map (dump_file, map);
|
||||
|
@ -1756,6 +1899,9 @@ coalesce_ssa_name (var_map map)
|
|||
((dump_flags & TDF_DETAILS) ? dump_file : NULL));
|
||||
|
||||
delete_coalesce_list (cl);
|
||||
|
||||
if (map->bitint && flag_tree_coalesce_vars)
|
||||
coalesce_bitint (map, graph);
|
||||
|
||||
ssa_conflicts_delete (graph);
|
||||
}
|
||||
|
||||
|
|
|
@ -77,10 +77,11 @@ var_map_base_fini (var_map map)
|
|||
}
|
||||
/* Create a variable partition map of SIZE for region, initialize and return
|
||||
it. Region is a loop if LOOP is non-NULL, otherwise is the current
|
||||
function. */
|
||||
function. If BITINT is non-NULL, only SSA_NAMEs from that bitmap
|
||||
will be coalesced. */
|
||||
|
||||
var_map
|
||||
init_var_map (int size, class loop *loop)
|
||||
init_var_map (int size, class loop *loop, bitmap bitint)
|
||||
{
|
||||
var_map map;
|
||||
|
||||
|
@ -109,7 +110,8 @@ init_var_map (int size, class loop *loop)
|
|||
else
|
||||
{
|
||||
map->bmp_bbs = NULL;
|
||||
map->outofssa_p = true;
|
||||
map->outofssa_p = bitint == NULL;
|
||||
map->bitint = bitint;
|
||||
basic_block bb;
|
||||
FOR_EACH_BB_FN (bb, cfun)
|
||||
map->vec_bbs.safe_push (bb);
|
||||
|
|
|
@ -70,6 +70,10 @@ typedef struct _var_map
|
|||
/* Vector of basic block in the region. */
|
||||
vec<basic_block> vec_bbs;
|
||||
|
||||
/* If non-NULL, only coalesce SSA_NAMEs from this bitmap, and try harder
|
||||
for those (for bitint lowering pass). */
|
||||
bitmap bitint;
|
||||
|
||||
/* True if this map is for out-of-ssa, otherwise for live range
|
||||
computation. When for out-of-ssa, it also means the var map is computed
|
||||
for whole current function. */
|
||||
|
@ -80,7 +84,7 @@ typedef struct _var_map
|
|||
/* Value used to represent no partition number. */
|
||||
#define NO_PARTITION -1
|
||||
|
||||
extern var_map init_var_map (int, class loop* = NULL);
|
||||
extern var_map init_var_map (int, class loop * = NULL, bitmap = NULL);
|
||||
extern void delete_var_map (var_map);
|
||||
extern int var_union (var_map, tree, tree);
|
||||
extern void partition_view_normal (var_map);
|
||||
|
@ -100,7 +104,7 @@ inline bool
|
|||
region_contains_p (var_map map, basic_block bb)
|
||||
{
|
||||
/* It's possible that the function is called with ENTRY_BLOCK/EXIT_BLOCK. */
|
||||
if (map->outofssa_p)
|
||||
if (map->outofssa_p || map->bitint)
|
||||
return (bb->index != ENTRY_BLOCK && bb->index != EXIT_BLOCK);
|
||||
|
||||
return bitmap_bit_p (map->bmp_bbs, bb->index);
|
||||
|
|
Loading…
Add table
Reference in a new issue