reorg.c (fill_slots_from_thread): Improve for TARGET_FLAGS_REGNUM
This handles TARGET_FLAGS_REGNUM clobbering insns as delay-slot
fillers using a method similar to that in commit 33c2207d3f
,
where care was taken for fill_simple_delay_slots to allow such
insns when scanning for delay-slot fillers *backwards* (before
the insn).
A TARGET_FLAGS_REGNUM target is typically a former cc0 target.
For cc0 targets, insns don't mention clobbering cc0, so the
clobbers are mentioned in the "resources" only as a special
entity and only for compare-insns and branches, where the cc0
value matters.
In contrast, with TARGET_FLAGS_REGNUM, most insns clobber it and
the register liveness detection in reorg.c / resource.c treats
that as a blocker (for other insns mentioning it, i.e. most)
when looking for delay-slot-filling candidates. This means that
when comparing core and performance for a delay-slot cc0 target
before and after the de-cc0 conversion, the inability to fill a
delay slot after conversion manifests as a regression. This was
one such case, for CRIS, with random_bitstring in
gcc.c-torture/execute/arith-rand-ll.c as well as the target
libgcc division function.
After this, all known performance regressions compared to cc0
are fixed.
gcc:
PR target/93372
* reorg.c (fill_slots_from_thread): Allow trial insns that clobber
TARGET_FLAGS_REGNUM as delay-slot fillers.
gcc/testsuite:
PR target/93372
* gcc.target/cris/pr93372-47.c: New test.
This commit is contained in:
parent
baf917af01
commit
0e6c51de8e
2 changed files with 85 additions and 1 deletions
37
gcc/reorg.c
37
gcc/reorg.c
|
@ -2412,6 +2412,21 @@ fill_slots_from_thread (rtx_jump_insn *insn, rtx condition,
|
|||
CLEAR_RESOURCE (&needed);
|
||||
CLEAR_RESOURCE (&set);
|
||||
|
||||
/* Handle the flags register specially, to be able to accept a
|
||||
candidate that clobbers it. See also fill_simple_delay_slots. */
|
||||
bool filter_flags
|
||||
= (slots_to_fill == 1
|
||||
&& targetm.flags_regnum != INVALID_REGNUM
|
||||
&& find_regno_note (insn, REG_DEAD, targetm.flags_regnum));
|
||||
struct resources fset;
|
||||
struct resources flags_res;
|
||||
if (filter_flags)
|
||||
{
|
||||
CLEAR_RESOURCE (&fset);
|
||||
CLEAR_RESOURCE (&flags_res);
|
||||
SET_HARD_REG_BIT (flags_res.regs, targetm.flags_regnum);
|
||||
}
|
||||
|
||||
/* If we do not own this thread, we must stop as soon as we find
|
||||
something that we can't put in a delay slot, since all we can do
|
||||
is branch into THREAD at a later point. Therefore, labels stop
|
||||
|
@ -2440,8 +2455,18 @@ fill_slots_from_thread (rtx_jump_insn *insn, rtx condition,
|
|||
/* If TRIAL conflicts with the insns ahead of it, we lose. Also,
|
||||
don't separate or copy insns that set and use CC0. */
|
||||
if (! insn_references_resource_p (trial, &set, true)
|
||||
&& ! insn_sets_resource_p (trial, &set, true)
|
||||
&& ! insn_sets_resource_p (trial, filter_flags ? &fset : &set, true)
|
||||
&& ! insn_sets_resource_p (trial, &needed, true)
|
||||
/* If we're handling sets to the flags register specially, we
|
||||
only allow an insn into a delay-slot, if it either:
|
||||
- doesn't set the flags register,
|
||||
- the "set" of the flags register isn't used (clobbered),
|
||||
- insns between the delay-slot insn and the trial-insn
|
||||
as accounted in "set", have not affected the flags register. */
|
||||
&& (! filter_flags
|
||||
|| ! insn_sets_resource_p (trial, &flags_res, true)
|
||||
|| find_regno_note (trial, REG_UNUSED, targetm.flags_regnum)
|
||||
|| ! TEST_HARD_REG_BIT (set.regs, targetm.flags_regnum))
|
||||
&& (!HAVE_cc0 || (! (reg_mentioned_p (cc0_rtx, pat)
|
||||
&& (! own_thread || ! sets_cc0_p (pat)))))
|
||||
&& ! can_throw_internal (trial))
|
||||
|
@ -2619,6 +2644,16 @@ fill_slots_from_thread (rtx_jump_insn *insn, rtx condition,
|
|||
lose = 1;
|
||||
mark_set_resources (trial, &set, 0, MARK_SRC_DEST_CALL);
|
||||
mark_referenced_resources (trial, &needed, true);
|
||||
if (filter_flags)
|
||||
{
|
||||
mark_set_resources (trial, &fset, 0, MARK_SRC_DEST_CALL);
|
||||
|
||||
/* Groups of flags-register setters with users should not
|
||||
affect opportunities to move flags-register-setting insns
|
||||
(clobbers) into the delay-slot. */
|
||||
CLEAR_HARD_REG_BIT (needed.regs, targetm.flags_regnum);
|
||||
CLEAR_HARD_REG_BIT (fset.regs, targetm.flags_regnum);
|
||||
}
|
||||
|
||||
/* Ensure we don't put insns between the setting of cc and the comparison
|
||||
by moving a setting of cc into an earlier delay slot since these insns
|
||||
|
|
49
gcc/testsuite/gcc.target/cris/pr93372-47.c
Normal file
49
gcc/testsuite/gcc.target/cris/pr93372-47.c
Normal file
|
@ -0,0 +1,49 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -march=v10" } */
|
||||
/* { dg-final { scan-assembler-times {\tnop} 1 } } */
|
||||
|
||||
/* A somewhat brittle test-case, checking that we have (only) one
|
||||
unfilled delay-slot in random_bitstring: there might be none or two
|
||||
or more, and general improvements may lead to unfilled delay-slots.
|
||||
When the scan-assembler-times directive regresses, re-run
|
||||
gcc.c-torture/execute/arith-rand-ll.c, check cycle-level
|
||||
execution-time regressions in random_bitstring and take appropriate
|
||||
action. */
|
||||
|
||||
static long long
|
||||
simple_rand ()
|
||||
{
|
||||
static unsigned long long seed = 47114711;
|
||||
unsigned long long this = seed * 1103515245 + 12345;
|
||||
seed = this;
|
||||
return this >> 8;
|
||||
}
|
||||
|
||||
unsigned long long int
|
||||
random_bitstring ()
|
||||
{
|
||||
unsigned long long int x;
|
||||
int n_bits;
|
||||
long long ran;
|
||||
int tot_bits = 0;
|
||||
|
||||
x = 0;
|
||||
for (;;)
|
||||
{
|
||||
ran = simple_rand ();
|
||||
n_bits = (ran >> 1) % 16;
|
||||
tot_bits += n_bits;
|
||||
|
||||
if (n_bits == 0)
|
||||
return x;
|
||||
else
|
||||
{
|
||||
x <<= n_bits;
|
||||
if (ran & 1)
|
||||
x |= (1 << n_bits) - 1;
|
||||
|
||||
if (tot_bits > 8 * sizeof (long long) + 6)
|
||||
return x;
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Add table
Reference in a new issue