ira: Add new hooks for callee-save vs spills [PR117477]

Following on from the discussion in:

  https://gcc.gnu.org/pipermail/gcc-patches/2025-February/675256.html

this patch removes TARGET_IRA_CALLEE_SAVED_REGISTER_COST_SCALE and
replaces it with two hooks: one that controls the cost of using an
extra callee-saved register and one that controls the cost of allocating
a frame for the first spill.

(The patch does not attempt to address the shrink-wrapping part of
the thread above.)

On AArch64, this is enough to fix PR117477, as verified by the new tests.
The patch does not change the SPEC2017 scores significantly.  (I saw a
slight improvement in fotonik3d and roms, but I'm not convinced that
the improvements are real.)

The patch makes IRA use caller saves for gcc.target/aarch64/pr103350-1.c,
which is a scan-dump correctness test that relies on not using
caller saves.  The decision to use caller saves looks appropriate,
and saves an instruction, so I've just added -fno-caller-saves
to the test options.

The x86 parts were written by Honza.

gcc/
	PR rtl-optimization/117477
	* config/aarch64/aarch64.cc (aarch64_count_saves): New function.
	(aarch64_count_above_hard_fp_saves, aarch64_callee_save_cost)
	(aarch64_frame_allocation_cost): Likewise.
	(TARGET_CALLEE_SAVE_COST): Define.
	(TARGET_FRAME_ALLOCATION_COST): Likewise.
	* config/i386/i386.cc (ix86_ira_callee_saved_register_cost_scale):
	Replace with...
	(ix86_callee_save_cost): ...this new hook.
	(TARGET_IRA_CALLEE_SAVED_REGISTER_COST_SCALE): Delete.
	(TARGET_CALLEE_SAVE_COST): Define.
	* target.h (spill_cost_type, frame_cost_type): New enums.
	* target.def (callee_save_cost, frame_allocation_cost): New hooks.
	(ira_callee_saved_register_cost_scale): Delete.
	* doc/tm.texi.in (TARGET_IRA_CALLEE_SAVED_REGISTER_COST_SCALE): Delete.
	(TARGET_CALLEE_SAVE_COST, TARGET_FRAME_ALLOCATION_COST): New hooks.
	* doc/tm.texi: Regenerate.
	* hard-reg-set.h (hard_reg_set_popcount): New function.
	* ira-color.cc (allocated_memory_p): New variable.
	(allocated_callee_save_regs): Likewise.
	(record_allocation): New function.
	(assign_hard_reg): Use targetm.frame_allocation_cost to model
	the cost of the first spill or first caller save.  Use
	targetm.callee_save_cost to model the cost of using new callee-saved
	registers.  Apply the exit rather than entry frequency to the cost
	of restoring a register or deallocating the frame.  Update the
	new variables above.
	(improve_allocation): Use record_allocation.
	(color): Initialize allocated_callee_save_regs.
	(ira_color): Initialize allocated_memory_p.
	* targhooks.h (default_callee_save_cost): Declare.
	(default_frame_allocation_cost): Likewise.
	* targhooks.cc (default_callee_save_cost): New function.
	(default_frame_allocation_cost): Likewise.

gcc/testsuite/
	PR rtl-optimization/117477
	* gcc.target/aarch64/callee_save_1.c: New test.
	* gcc.target/aarch64/callee_save_2.c: Likewise.
	* gcc.target/aarch64/callee_save_3.c: Likewise.
	* gcc.target/aarch64/pr103350-1.c: Add -fno-caller-saves.

Co-authored-by: Jan Hubicka <hubicka@ucw.cz>
This commit is contained in:
Richard Sandiford 2025-03-06 11:06:25 +00:00
parent 50cd997952
commit e836d80374
14 changed files with 458 additions and 38 deletions

View file

@ -15873,6 +15873,118 @@ aarch64_memory_move_cost (machine_mode mode, reg_class_t rclass_i, bool in)
: base + aarch64_tune_params.memmov_cost.store_int);
}
/* CALLEE_SAVED_REGS is the set of callee-saved registers that the
RA has already decided to use. Return the total number of registers
in class RCLASS that need to be saved and restored, including the
frame link registers. */
static int
aarch64_count_saves (const HARD_REG_SET &callee_saved_regs, reg_class rclass)
{
auto saved_gprs = callee_saved_regs & reg_class_contents[rclass];
auto nregs = hard_reg_set_popcount (saved_gprs);
if (TEST_HARD_REG_BIT (reg_class_contents[rclass], LR_REGNUM))
{
if (aarch64_needs_frame_chain ())
nregs += 2;
else if (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM))
nregs += 1;
}
return nregs;
}
/* CALLEE_SAVED_REGS is the set of callee-saved registers that the
RA has already decided to use. Return the total number of registers
that need to be saved above the hard frame pointer, including the
frame link registers. */
static int
aarch64_count_above_hard_fp_saves (const HARD_REG_SET &callee_saved_regs)
{
/* FP and Advanced SIMD registers are saved above the frame pointer
but SVE registers are saved below it. */
if (known_le (GET_MODE_SIZE (aarch64_reg_save_mode (V8_REGNUM)), 16U))
return aarch64_count_saves (callee_saved_regs, POINTER_AND_FP_REGS);
return aarch64_count_saves (callee_saved_regs, POINTER_REGS);
}
/* Implement TARGET_CALLEE_SAVE_COST. */
static int
aarch64_callee_save_cost (spill_cost_type spill_type, unsigned int regno,
machine_mode mode, unsigned int nregs, int mem_cost,
const HARD_REG_SET &callee_saved_regs,
bool existing_spill_p)
{
/* If we've already committed to saving an odd number of GPRs, assume that
saving one more will involve turning an STR into an STP and an LDR
into an LDP. This should still be more expensive than not spilling
(meaning that the minimum cost is 1), but it should usually be cheaper
than a separate store or load. */
if (GP_REGNUM_P (regno)
&& nregs == 1
&& (aarch64_count_saves (callee_saved_regs, GENERAL_REGS) & 1))
return 1;
/* Similarly for saving FP registers, if we only need to save the low
64 bits. (We can also use STP/LDP instead of STR/LDR for Q registers,
but that is less likely to be a saving.) */
if (FP_REGNUM_P (regno)
&& nregs == 1
&& known_eq (GET_MODE_SIZE (aarch64_reg_save_mode (regno)), 8U)
&& (aarch64_count_saves (callee_saved_regs, FP_REGS) & 1))
return 1;
/* If this would be the first register that we save, add the cost of
allocating or deallocating the frame. For GPR, FPR, and Advanced SIMD
saves, the allocation and deallocation can be folded into the save and
restore. */
if (!existing_spill_p
&& !GP_REGNUM_P (regno)
&& !(FP_REGNUM_P (regno)
&& known_le (GET_MODE_SIZE (aarch64_reg_save_mode (regno)), 16U)))
return default_callee_save_cost (spill_type, regno, mode, nregs, mem_cost,
callee_saved_regs, existing_spill_p);
return mem_cost;
}
/* Implement TARGET_FRAME_ALLOCATION_COST. */
static int
aarch64_frame_allocation_cost (frame_cost_type,
const HARD_REG_SET &callee_saved_regs)
{
/* The intention is to model the relative costs of different approaches
to storing data on the stack, rather than to model the cost of saving
data vs not saving it. This means that we should return 0 if:
- any frame is going to be allocated with:
stp x29, x30, [sp, #-...]!
to create a frame link.
- any frame is going to be allocated with:
str x30, [sp, #-...]!
to save the link register.
In both cases, the allocation and deallocation instructions are the
same however we store data to the stack. (In the second case, the STR
could be converted to an STP by saving an extra call-preserved register,
but that is modeled by aarch64_callee_save_cost.)
In other cases, assume that a frame would need to be allocated with a
separate subtraction and deallocated with a separate addition. Saves
of call-clobbered registers can then reclaim this cost using a
predecrement store and a postincrement load.
For simplicity, give this addition or subtraction the same cost as
a GPR move. We could parameterize this if necessary. */
if (aarch64_count_above_hard_fp_saves (callee_saved_regs) == 0)
return aarch64_tune_params.regmove_cost->GP2GP;
return 0;
}
/* Implement TARGET_INSN_COST. We have the opportunity to do something
much more productive here, such as using insn attributes to cost things.
But we don't, not yet.
@ -31568,6 +31680,12 @@ aarch64_libgcc_floating_mode_supported_p
#undef TARGET_MEMORY_MOVE_COST
#define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
#undef TARGET_CALLEE_SAVE_COST
#define TARGET_CALLEE_SAVE_COST aarch64_callee_save_cost
#undef TARGET_FRAME_ALLOCATION_COST
#define TARGET_FRAME_ALLOCATION_COST aarch64_frame_allocation_cost
#undef TARGET_MIN_DIVISIONS_FOR_RECIP_MUL
#define TARGET_MIN_DIVISIONS_FOR_RECIP_MUL aarch64_min_divisions_for_recip_mul

View file

@ -20600,12 +20600,27 @@ ix86_class_likely_spilled_p (reg_class_t rclass)
return false;
}
/* Implement TARGET_IRA_CALLEE_SAVED_REGISTER_COST_SCALE. */
/* Implement TARGET_CALLEE_SAVE_COST. */
static int
ix86_ira_callee_saved_register_cost_scale (int)
ix86_callee_save_cost (spill_cost_type, unsigned int hard_regno, machine_mode,
unsigned int, int mem_cost, const HARD_REG_SET &, bool)
{
return 1;
/* Account for the fact that push and pop are shorter and do their
own allocation and deallocation. */
if (GENERAL_REGNO_P (hard_regno))
{
/* push is 1 byte while typical spill is 4-5 bytes.
??? We probably should adjust size costs accordingly.
Costs are relative to reg-reg move that has 2 bytes for 32bit
and 3 bytes otherwise. */
if (optimize_function_for_size_p (cfun))
return 1;
/* Be sure that no cost table sets cost to 2, so we end up with 0. */
gcc_checking_assert (mem_cost > 2);
return mem_cost - 2;
}
return mem_cost;
}
/* Return true if a set of DST by the expression SRC should be allowed.
@ -27092,9 +27107,8 @@ ix86_libgcc_floating_mode_supported_p
#define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P hook_bool_mode_true
#undef TARGET_CLASS_LIKELY_SPILLED_P
#define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
#undef TARGET_IRA_CALLEE_SAVED_REGISTER_COST_SCALE
#define TARGET_IRA_CALLEE_SAVED_REGISTER_COST_SCALE \
ix86_ira_callee_saved_register_cost_scale
#undef TARGET_CALLEE_SAVE_COST
#define TARGET_CALLEE_SAVE_COST ix86_callee_save_cost
#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \

View file

@ -3047,14 +3047,6 @@ A target hook which can change allocno class for given pseudo from
The default version of this target hook always returns given class.
@end deftypefn
@deftypefn {Target Hook} int TARGET_IRA_CALLEE_SAVED_REGISTER_COST_SCALE (int @var{hard_regno})
A target hook which returns the callee-saved register @var{hard_regno}
cost scale in epilogue and prologue used by IRA.
The default version of this target hook returns 1 if optimizing for
size, otherwise returns the entry block frequency.
@end deftypefn
@deftypefn {Target Hook} bool TARGET_LRA_P (void)
A target hook which returns true if we use LRA instead of reload pass.
@ -7011,6 +7003,75 @@ value to the result of that function. The arguments to that function
are the same as to this target hook.
@end deftypefn
@deftypefn {Target Hook} int TARGET_CALLEE_SAVE_COST (spill_cost_type @var{cost_type}, unsigned int @var{hard_regno}, machine_mode @var{mode}, unsigned int @var{nregs}, int @var{mem_cost}, const HARD_REG_SET @var{&allocated_callee_regs}, bool @var{existing_spills_p})
Return the one-off cost of saving or restoring callee-saved registers
(also known as call-preserved registers or non-volatile registers).
The parameters are as follows:
@itemize
@item
@var{cost_type} is @samp{spill_cost_type::SAVE} for saving a register
and @samp{spill_cost_type::RESTORE} for restoring a register.
@item
@var{hard_regno} and @var{mode} represent the whole register that
the register allocator is considering using; of these,
@var{nregs} registers are fully or partially callee-saved.
@item
@var{mem_cost} is the normal cost for storing (for saves)
or loading (for restores) the @var{nregs} registers.
@item
@var{allocated_callee_regs} is the set of callee-saved registers
that are already in use.
@item
@var{existing_spills_p} is true if the register allocator has
already decided to spill registers to memory.
@end itemize
If @var{existing_spills_p} is false, the cost of a save should account
for frame allocations in a way that is consistent with
@code{TARGET_FRAME_ALLOCATION_COST}'s handling of allocations for spills.
Similarly, the cost of a restore should then account for frame deallocations
in a way that is consistent with @code{TARGET_FRAME_ALLOCATION_COST}'s
handling of deallocations.
Note that this hook should not attempt to apply a frequency scale
to the cost: it is the caller's responsibility to do that where
appropriate.
The default implementation returns @var{mem_cost}, plus the allocation
or deallocation cost returned by @code{TARGET_FRAME_ALLOCATION_COST},
where appropriate.
@end deftypefn
@deftypefn {Target Hook} int TARGET_FRAME_ALLOCATION_COST (frame_cost_type @var{cost_type}, const HARD_REG_SET @var{&allocated_callee_regs})
Return the cost of allocating or deallocating a frame for the sake of
a spill; @var{cost_type} chooses between allocation and deallocation.
The term ``spill'' here includes both forcing a pseudo register to memory
and using caller-saved registers for pseudo registers that are live across
a call.
This hook is only called if the register allocator has not so far
decided to spill. The allocator may have decided to use callee-saved
registers; if so, @var{allocated_callee_regs} is the set of callee-saved
registers that the allocator has used. There might also be other reasons
why a stack frame is already needed; for example, @samp{get_frame_size ()}
might be nonzero, or the target might already require a frame for
target-specific reasons.
When the register allocator uses this hook to cost spills, it also uses
@code{TARGET_CALLEE_SAVE_COST} to cost new callee-saved registers, passing
@samp{false} as the @var{existing_spills_p} argument. The intention is to
allow the target to apply an apples-for-apples comparison between the
cost of using callee-saved registers and using spills in cases where the
allocator has not yet committed to using both strategies.
The default implementation returns 0.
@end deftypefn
@defmac BRANCH_COST (@var{speed_p}, @var{predictable_p})
A C expression for the cost of a branch instruction. A value of 1 is
the default; other values are interpreted relative to that. Parameter

View file

@ -2388,8 +2388,6 @@ in the reload pass.
@hook TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
@hook TARGET_IRA_CALLEE_SAVED_REGISTER_COST_SCALE
@hook TARGET_LRA_P
@hook TARGET_REGISTER_PRIORITY
@ -4584,6 +4582,10 @@ These macros are obsolete, new ports should use the target hook
@hook TARGET_MEMORY_MOVE_COST
@hook TARGET_CALLEE_SAVE_COST
@hook TARGET_FRAME_ALLOCATION_COST
@defmac BRANCH_COST (@var{speed_p}, @var{predictable_p})
A C expression for the cost of a branch instruction. A value of 1 is
the default; other values are interpreted relative to that. Parameter

View file

@ -191,6 +191,12 @@ hard_reg_set_empty_p (const_hard_reg_set x)
return x == HARD_CONST (0);
}
inline int
hard_reg_set_popcount (const_hard_reg_set x)
{
return popcount_hwi (x);
}
#else
inline void
@ -254,6 +260,15 @@ hard_reg_set_empty_p (const_hard_reg_set x)
bad |= x.elts[i];
return bad == 0;
}
inline int
hard_reg_set_popcount (const_hard_reg_set x)
{
int count = 0;
for (unsigned int i = 0; i < ARRAY_SIZE (x.elts); ++i)
count += popcount_hwi (x.elts[i]);
return count;
}
#endif
/* Iterator for hard register sets. */

View file

@ -1195,10 +1195,16 @@ finish_update_cost_records (void)
update_cost_record_pool.release ();
}
/* True if we have allocated memory, or intend to do so. */
static bool allocated_memory_p;
/* Array whose element value is TRUE if the corresponding hard
register was already allocated for an allocno. */
static bool allocated_hardreg_p[FIRST_PSEUDO_REGISTER];
/* Which callee-saved hard registers we've decided to save. */
static HARD_REG_SET allocated_callee_save_regs;
/* Describes one element in a queue of allocnos whose costs need to be
updated. Each allocno in the queue is known to have an allocno
class. */
@ -1740,6 +1746,20 @@ check_hard_reg_p (ira_allocno_t a, int hard_regno,
return j == nregs;
}
/* Record that we have allocated NREGS registers starting at HARD_REGNO. */
static void
record_allocation (int hard_regno, int nregs)
{
for (int i = 0; i < nregs; ++i)
if (!allocated_hardreg_p[hard_regno + i])
{
allocated_hardreg_p[hard_regno + i] = true;
if (!crtl->abi->clobbers_full_reg_p (hard_regno + i))
SET_HARD_REG_BIT (allocated_callee_save_regs, hard_regno + i);
}
}
/* Return number of registers needed to be saved and restored at
function prologue/epilogue if we allocate HARD_REGNO to hold value
of MODE. */
@ -1961,6 +1981,12 @@ assign_hard_reg (ira_allocno_t a, bool retry_p)
#endif
auto_bitmap allocnos_to_spill;
HARD_REG_SET soft_conflict_regs = {};
int entry_freq = REG_FREQ_FROM_BB (ENTRY_BLOCK_PTR_FOR_FN (cfun));
int exit_freq = REG_FREQ_FROM_BB (EXIT_BLOCK_PTR_FOR_FN (cfun));
int spill_cost = 0;
/* Whether we have spilled pseudos or used caller-saved registers for values
that are live across a call. */
bool existing_spills_p = allocated_memory_p || caller_save_needed;
ira_assert (! ALLOCNO_ASSIGNED_P (a));
get_conflict_and_start_profitable_regs (a, retry_p,
@ -1979,6 +2005,18 @@ assign_hard_reg (ira_allocno_t a, bool retry_p)
start_update_cost ();
mem_cost += ALLOCNO_UPDATED_MEMORY_COST (a);
if (!existing_spills_p)
{
auto entry_cost = targetm.frame_allocation_cost
(frame_cost_type::ALLOCATION, allocated_callee_save_regs);
spill_cost += entry_cost * entry_freq;
auto exit_cost = targetm.frame_allocation_cost
(frame_cost_type::DEALLOCATION, allocated_callee_save_regs);
spill_cost += exit_cost * exit_freq;
}
mem_cost += spill_cost;
ira_allocate_and_copy_costs (&ALLOCNO_UPDATED_HARD_REG_COSTS (a),
aclass, ALLOCNO_HARD_REG_COSTS (a));
a_costs = ALLOCNO_UPDATED_HARD_REG_COSTS (a);
@ -2175,16 +2213,37 @@ assign_hard_reg (ira_allocno_t a, bool retry_p)
/* We need to save/restore the hard register in
epilogue/prologue. Therefore we increase the cost. */
{
int nregs = hard_regno_nregs (hard_regno, mode);
add_cost = 0;
rclass = REGNO_REG_CLASS (hard_regno);
add_cost = ((ira_memory_move_cost[mode][rclass][0]
+ ira_memory_move_cost[mode][rclass][1])
* saved_nregs / hard_regno_nregs (hard_regno,
mode) - 1)
* targetm.ira_callee_saved_register_cost_scale (hard_regno);
auto entry_cost = targetm.callee_save_cost
(spill_cost_type::SAVE, hard_regno, mode, saved_nregs,
ira_memory_move_cost[mode][rclass][0] * saved_nregs / nregs,
allocated_callee_save_regs, existing_spills_p);
/* In the event of a tie between caller-save and callee-save,
prefer callee-save. We apply this to the entry cost rather
than the exit cost since the entry frequency must be at
least as high as the exit frequency. */
if (entry_cost > 1)
entry_cost -= 1;
add_cost += entry_cost * entry_freq;
auto exit_cost = targetm.callee_save_cost
(spill_cost_type::RESTORE, hard_regno, mode, saved_nregs,
ira_memory_move_cost[mode][rclass][1] * saved_nregs / nregs,
allocated_callee_save_regs, existing_spills_p);
add_cost += exit_cost * exit_freq;
cost += add_cost;
full_cost += add_cost;
}
}
if (ira_need_caller_save_p (a, hard_regno))
{
cost += spill_cost;
full_cost += spill_cost;
}
if (min_cost > cost)
min_cost = cost;
if (min_full_cost > full_cost)
@ -2211,11 +2270,13 @@ assign_hard_reg (ira_allocno_t a, bool retry_p)
fail:
if (best_hard_regno >= 0)
{
for (i = hard_regno_nregs (best_hard_regno, mode) - 1; i >= 0; i--)
allocated_hardreg_p[best_hard_regno + i] = true;
record_allocation (best_hard_regno,
hard_regno_nregs (best_hard_regno, mode));
spill_soft_conflicts (a, allocnos_to_spill, soft_conflict_regs,
best_hard_regno);
}
else
allocated_memory_p = true;
if (! retry_p)
restore_costs_from_copies (a);
ALLOCNO_HARD_REGNO (a) = best_hard_regno;
@ -3368,8 +3429,7 @@ improve_allocation (void)
/* Assign the best chosen hard register to A. */
ALLOCNO_HARD_REGNO (a) = best;
for (j = nregs - 1; j >= 0; j--)
allocated_hardreg_p[best + j] = true;
record_allocation (best, nregs);
if (internal_flag_ira_verbose > 2 && ira_dump_file != NULL)
fprintf (ira_dump_file, "Assigning %d to a%dr%d\n",
@ -5199,6 +5259,7 @@ color (void)
{
allocno_stack_vec.create (ira_allocnos_num);
memset (allocated_hardreg_p, 0, sizeof (allocated_hardreg_p));
CLEAR_HARD_REG_SET (allocated_callee_save_regs);
ira_initiate_assign ();
do_coloring ();
ira_finish_assign ();
@ -5327,10 +5388,14 @@ ira_color (void)
ira_allocno_iterator ai;
/* Setup updated costs. */
allocated_memory_p = false;
FOR_EACH_ALLOCNO (a, ai)
{
ALLOCNO_UPDATED_MEMORY_COST (a) = ALLOCNO_MEMORY_COST (a);
ALLOCNO_UPDATED_CLASS_COST (a) = ALLOCNO_CLASS_COST (a);
if (ALLOCNO_CLASS (a) == NO_REGS
&& !ira_equiv_no_lvalue_p (ALLOCNO_REGNO (a)))
allocated_memory_p = true;
}
if (ira_conflicts_p)
color ();

View file

@ -3775,6 +3775,81 @@ are the same as to this target hook.",
int, (machine_mode mode, reg_class_t rclass, bool in),
default_memory_move_cost)
DEFHOOK
(callee_save_cost,
"Return the one-off cost of saving or restoring callee-saved registers\n\
(also known as call-preserved registers or non-volatile registers).\n\
The parameters are as follows:\n\
\n\
@itemize\n\
@item\n\
@var{cost_type} is @samp{spill_cost_type::SAVE} for saving a register\n\
and @samp{spill_cost_type::RESTORE} for restoring a register.\n\
\n\
@item\n\
@var{hard_regno} and @var{mode} represent the whole register that\n\
the register allocator is considering using; of these,\n\
@var{nregs} registers are fully or partially callee-saved.\n\
\n\
@item\n\
@var{mem_cost} is the normal cost for storing (for saves)\n\
or loading (for restores) the @var{nregs} registers.\n\
\n\
@item\n\
@var{allocated_callee_regs} is the set of callee-saved registers\n\
that are already in use.\n\
\n\
@item\n\
@var{existing_spills_p} is true if the register allocator has\n\
already decided to spill registers to memory.\n\
@end itemize\n\
\n\
If @var{existing_spills_p} is false, the cost of a save should account\n\
for frame allocations in a way that is consistent with\n\
@code{TARGET_FRAME_ALLOCATION_COST}'s handling of allocations for spills.\n\
Similarly, the cost of a restore should then account for frame deallocations\n\
in a way that is consistent with @code{TARGET_FRAME_ALLOCATION_COST}'s\n\
handling of deallocations.\n\
\n\
Note that this hook should not attempt to apply a frequency scale\n\
to the cost: it is the caller's responsibility to do that where\n\
appropriate.\n\
\n\
The default implementation returns @var{mem_cost}, plus the allocation\n\
or deallocation cost returned by @code{TARGET_FRAME_ALLOCATION_COST},\n\
where appropriate.",
int, (spill_cost_type cost_type, unsigned int hard_regno,
machine_mode mode, unsigned int nregs, int mem_cost,
const HARD_REG_SET &allocated_callee_regs, bool existing_spills_p),
default_callee_save_cost)
DEFHOOK
(frame_allocation_cost,
"Return the cost of allocating or deallocating a frame for the sake of\n\
a spill; @var{cost_type} chooses between allocation and deallocation.\n\
The term ``spill'' here includes both forcing a pseudo register to memory\n\
and using caller-saved registers for pseudo registers that are live across\n\
a call.\n\
\n\
This hook is only called if the register allocator has not so far\n\
decided to spill. The allocator may have decided to use callee-saved\n\
registers; if so, @var{allocated_callee_regs} is the set of callee-saved\n\
registers that the allocator has used. There might also be other reasons\n\
why a stack frame is already needed; for example, @samp{get_frame_size ()}\n\
might be nonzero, or the target might already require a frame for\n\
target-specific reasons.\n\
\n\
When the register allocator uses this hook to cost spills, it also uses\n\
@code{TARGET_CALLEE_SAVE_COST} to cost new callee-saved registers, passing\n\
@samp{false} as the @var{existing_spills_p} argument. The intention is to\n\
allow the target to apply an apples-for-apples comparison between the\n\
cost of using callee-saved registers and using spills in cases where the\n\
allocator has not yet committed to using both strategies.\n\
\n\
The default implementation returns 0.",
int, (frame_cost_type cost_type, const HARD_REG_SET &allocated_callee_regs),
default_frame_allocation_cost)
DEFHOOK
(use_by_pieces_infrastructure_p,
"GCC will attempt several strategies when asked to copy between\n\
@ -5714,18 +5789,6 @@ DEFHOOK
reg_class_t, (int, reg_class_t, reg_class_t),
default_ira_change_pseudo_allocno_class)
/* Scale of callee-saved register cost in epilogue and prologue used by
IRA. */
DEFHOOK
(ira_callee_saved_register_cost_scale,
"A target hook which returns the callee-saved register @var{hard_regno}\n\
cost scale in epilogue and prologue used by IRA.\n\
\n\
The default version of this target hook returns 1 if optimizing for\n\
size, otherwise returns the entry block frequency.",
int, (int hard_regno),
default_ira_callee_saved_register_cost_scale)
/* Return true if we use LRA instead of reload. */
DEFHOOK
(lra_p,

View file

@ -284,6 +284,18 @@ enum poly_value_estimate_kind
POLY_VALUE_LIKELY
};
enum class spill_cost_type
{
SAVE,
RESTORE
};
enum class frame_cost_type
{
ALLOCATION,
DEALLOCATION
};
typedef void (*emit_support_tinfos_callback) (tree);
extern bool verify_type_context (location_t, type_context_kind, const_tree,

View file

@ -2083,6 +2083,33 @@ default_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
#endif
}
/* The default implementation of TARGET_CALLEE_SAVE_COST. */
int
default_callee_save_cost (spill_cost_type spill_type, unsigned int,
machine_mode, unsigned int, int mem_cost,
const HARD_REG_SET &callee_saved_regs,
bool existing_spills_p)
{
if (!existing_spills_p)
{
auto frame_type = (spill_type == spill_cost_type::SAVE
? frame_cost_type::ALLOCATION
: frame_cost_type::DEALLOCATION);
mem_cost += targetm.frame_allocation_cost (frame_type,
callee_saved_regs);
}
return mem_cost;
}
/* The default implementation of TARGET_FRAME_ALLOCATION_COST. */
int
default_frame_allocation_cost (frame_cost_type, const HARD_REG_SET &)
{
return 0;
}
/* The default implementation of TARGET_SLOW_UNALIGNED_ACCESS. */
bool

View file

@ -235,6 +235,11 @@ extern tree default_builtin_tm_load_store (tree);
extern int default_memory_move_cost (machine_mode, reg_class_t, bool);
extern int default_register_move_cost (machine_mode, reg_class_t,
reg_class_t);
extern int default_callee_save_cost (spill_cost_type, unsigned int,
machine_mode, unsigned int, int,
const HARD_REG_SET &, bool);
extern int default_frame_allocation_cost (frame_cost_type,
const HARD_REG_SET &);
extern bool default_slow_unaligned_access (machine_mode, unsigned int);
extern HOST_WIDE_INT default_estimated_poly_value (poly_int64,
poly_value_estimate_kind);

View file

@ -0,0 +1,12 @@
/* { dg-options "-O2" } */
int test (int x), test2 (int x);
int foo (int x, int y) {
test (x);
int lhs = test2 (y);
return x + lhs;
}
/* { dg-final { scan-assembler {\tstp\tx19, x20, \[sp,} } } */
/* { dg-final { scan-assembler {\tldp\tx19, x20, \[sp,} } } */

View file

@ -0,0 +1,14 @@
/* { dg-options "-O2 -fomit-frame-pointer" } */
int test (int x), test2 (int x);
int foo (int x, int y) {
test (x);
int lhs = test2 (y);
return x + lhs;
}
/* { dg-final { scan-assembler {\tstp\tx30, x19, \[sp,} } } */
/* { dg-final { scan-assembler {\tldp\tx30, x19, \[sp\],} } } */
/* { dg-final { scan-assembler {\tstr\tw1, \[sp,} } } */
/* { dg-final { scan-assembler {\tldr\tw0, \[sp,} } } */

View file

@ -0,0 +1,12 @@
/* { dg-options "-O2" } */
float test ();
float g;
float foo (float x, float y) {
g = x + test ();
return (x + test ()) * y;
}
/* { dg-final { scan-assembler {\tstp\td14, d15, \[sp,} } } */
/* { dg-final { scan-assembler {\tldp\td14, d15, \[sp,} } } */

View file

@ -1,5 +1,5 @@
/* { dg-do run { target le } } */
/* { dg-additional-options "-Os -fno-tree-ter -save-temps -fdump-rtl-ree-all -free -std=c99 -w" } */
/* { dg-additional-options "-Os -fno-tree-ter -save-temps -fdump-rtl-ree-all -free -std=c99 -w -fno-caller-saves" } */
typedef unsigned char u8;
typedef unsigned char __attribute__((__vector_size__ (8))) v64u8;