aarch64: Add missing early-ra bookkeeping [PR113295]

416.gamess showed up two wrong-code bugs in early-ra.  This patch
fixes the first of them.  It was difficult to reduce the source code
to something that would meaningfully show the situation, so the
testcase uses a direct RTL sequence instead.

In the sequence:

(a) register <2> is set more than once
(b) register <2> is copied to a temporary (<4>)
(c) register <2> is the destination of an FCSEL between <4> and
    another value (<5>)
(d) <4> and <2> are equivalent for <4>'s live range
(e) <5>'s and <2>'s live ranges do not intersect, and there is
    a pseudo-copy between <5> and <2>

On its own, (d) implies that <4> can be treated as equivalent to <2>.
And on its own, (e) implies that <5> can share <2>'s register.  But
<4>'s and <5>'s live ranges conflict, meaning that they cannot both
share the register together.  A bit of missing bookkeeping meant that
the mechanism for detecting this didn't fire.  We therefore ended up
with an FCSEL in which both inputs were the same register.

gcc/
	PR target/113295
	* config/aarch64/aarch64-early-ra.cc
	(early_ra::find_related_start): Account for definitions by shared
	registers when testing for a single register definition.
	(early_ra::accumulate_defs): New function.
	(early_ra::record_copy): If A shares B's register, fold A's
	definition information into B's.  Fold A's use information into B's.

gcc/testsuite/
	PR target/113295
	* gcc.dg/rtl/aarch64/pr113295-1.c: New test.
This commit is contained in:
Richard Sandiford 2024-02-23 14:12:54 +00:00
parent 7f2cf0c45f
commit 8a16e06da9
2 changed files with 80 additions and 0 deletions

View file

@ -440,6 +440,7 @@ private:
void record_allocno_use (allocno_info *);
void record_allocno_def (allocno_info *);
allocno_info *find_related_start (allocno_info *, allocno_info *, bool);
void accumulate_defs (allocno_info *, allocno_info *);
void record_copy (rtx, rtx, bool = false);
void record_constraints (rtx_insn *);
void record_artificial_refs (unsigned int);
@ -1569,6 +1570,8 @@ early_ra::find_related_start (allocno_info *dest_allocno,
}
if (dest_allocno->group_size != 1
// Account for definitions by shared registers.
|| dest_allocno->num_defs > 1
|| DF_REG_DEF_COUNT (dest_allocno->group ()->regno) != 1)
// Currently only single allocnos that are defined once can
// share registers with non-equivalent allocnos. This could be
@ -1593,6 +1596,20 @@ early_ra::find_related_start (allocno_info *dest_allocno,
}
}
// Add FROM_ALLOCNO's definition information to TO_ALLOCNO's.
void
early_ra::accumulate_defs (allocno_info *to_allocno,
allocno_info *from_allocno)
{
if (from_allocno->num_defs > 0)
{
to_allocno->num_defs = MIN (from_allocno->num_defs
+ to_allocno->num_defs, 2);
to_allocno->last_def_point = MAX (to_allocno->last_def_point,
from_allocno->last_def_point);
}
}
// Record any relevant allocno-related information for an actual or imagined
// copy from SRC to DEST. FROM_MOVE_P is true if the copy was an explicit
// move instruction, false if it represents one way of satisfying the previous
@ -1687,6 +1704,16 @@ early_ra::record_copy (rtx dest, rtx src, bool from_move_p)
next_allocno->related_allocno = src_allocno->id;
next_allocno->is_equiv = (start_allocno == dest_allocno
&& from_move_p);
// If we're sharing two allocnos that are not equivalent,
// carry across the definition information. This is needed
// to prevent multiple incompatible attempts to share with
// the same register.
if (next_allocno->is_shared ())
accumulate_defs (src_allocno, next_allocno);
src_allocno->last_use_point
= MAX (src_allocno->last_use_point,
next_allocno->last_use_point);
if (next_allocno == start_allocno)
break;
next_allocno = m_allocnos[next_allocno->copy_dest];

View file

@ -0,0 +1,53 @@
// { dg-options "-O2" }
// { dg-do run }
struct data {
double x;
double y;
long long cond;
double res;
};
void __RTL (startwith ("early_ra")) foo (struct data *ptr)
{
(function "foo"
(param "ptr"
(DECL_RTL (reg/v:DI <0> [ ptr ]))
(DECL_RTL_INCOMING (reg/v:DI x0 [ ptr ]))
) ;; param "ptr"
(insn-chain
(block 2
(edge-from entry (flags "FALLTHRU"))
(cnote 3 [bb 2] NOTE_INSN_BASIC_BLOCK)
(insn 4 (set (reg:DI <0>) (reg:DI x0)))
(insn 5 (set (reg:DF <1>) (mem:DF (reg:DI <0>) [1 S8 A8])))
(insn 6 (set (reg:DF <2>) (mem:DF (plus:DI (reg:DI <0>)
(const_int 8)) [1 S8 A8])))
(insn 7 (set (reg:DI <3>) (mem:DI (plus:DI (reg:DI <0>)
(const_int 16)) [1 S8 A8])))
(insn 8 (set (reg:CC cc) (compare:CC (reg:DI <3>) (const_int 0))))
(insn 9 (set (reg:DF <4>) (reg:DF <2>)))
(insn 10 (set (reg:DF <5>) (plus:DF (reg:DF <1>) (reg:DF <2>))))
(insn 11 (set (reg:DF <2>) (if_then_else:DF
(ge (reg:CC cc) (const_int 0))
(reg:DF <4>)
(reg:DF <5>))))
(insn 12 (set (mem:DF (plus:DI (reg:DI <0>)
(const_int 24)) [1 S8 A8])
(reg:DF <2>)))
(edge-to exit (flags "FALLTHRU"))
) ;; block 2
) ;; insn-chain
) ;; function
}
int
main (void)
{
struct data d1 = { 1, 2, -1, 0 };
struct data d2 = { 3, 4, 1, 0 };
foo (&d1);
foo (&d2);
if (d1.res != 3 || d2.res != 4)
__builtin_abort ();
}