aarch64: Tighten early-ra chain test for wide registers [PR113295]
Most code in early-ra used is_chain_candidate to check whether we should chain two allocnos. This included both tests that matter for correctness and tests for certain heuristics. Once that test passes for one pair of allocnos, we test whether it's safe to chain the containing groups (which might contain multiple allocnos for x2, x3 and x4 modes). This test used an inline test for correctness only, deliberately skipping the heuristics. However, this instance of the test was missing some handling of equivalent allocnos. This patch fixes things by making is_chain_candidate take a strictness parameter: correctness only, or correctness + heuristics. It then makes the group-chaining test use the correctness version rather than trying to replicate it inline. gcc/ PR target/113295 * config/aarch64/aarch64-early-ra.cc (early_ra::test_strictness): New enum. (early_ra::is_chain_candidate): Add a strictness parameter to control whether only correctness matters, or whether both correctness and heuristics should be used. Handle multiple levels of equivalence. (early_ra::find_related_start): Update call accordingly. (early_ra::strided_polarity_pref): Likewise. (early_ra::form_chains): Likewise. (early_ra::try_to_chain_allocnos): Use is_chain_candidate in correctness mode rather than trying to inline the test. gcc/testsuite/ PR target/113295 * gcc.target/aarch64/pr113295-2.c: New test.
This commit is contained in:
parent
8a16e06da9
commit
9f105cfdc1
2 changed files with 82 additions and 23 deletions
|
@ -95,6 +95,10 @@ public:
|
|||
void execute ();
|
||||
|
||||
private:
|
||||
// Whether to test only things that are required for correctness,
|
||||
// or whether to take optimization heuristics into account as well.
|
||||
enum test_strictness { CORRECTNESS_ONLY, ALL_REASONS };
|
||||
|
||||
static_assert (MAX_RECOG_OPERANDS <= 32, "Operand mask is 32 bits");
|
||||
using operand_mask = uint32_t;
|
||||
|
||||
|
@ -452,7 +456,7 @@ private:
|
|||
|
||||
template<unsigned int allocno_info::*field>
|
||||
static int cmp_increasing (const void *, const void *);
|
||||
bool is_chain_candidate (allocno_info *, allocno_info *);
|
||||
bool is_chain_candidate (allocno_info *, allocno_info *, test_strictness);
|
||||
int rate_chain (allocno_info *, allocno_info *);
|
||||
static int cmp_chain_candidates (const void *, const void *);
|
||||
void chain_allocnos (unsigned int &, unsigned int &);
|
||||
|
@ -1588,7 +1592,7 @@ early_ra::find_related_start (allocno_info *dest_allocno,
|
|||
return res;
|
||||
|
||||
auto *next_allocno = m_allocnos[dest_allocno->copy_dest];
|
||||
if (!is_chain_candidate (dest_allocno, next_allocno))
|
||||
if (!is_chain_candidate (dest_allocno, next_allocno, ALL_REASONS))
|
||||
return res;
|
||||
|
||||
dest_allocno = next_allocno;
|
||||
|
@ -2011,7 +2015,7 @@ early_ra::strided_polarity_pref (allocno_info *allocno1,
|
|||
if (allocno1->offset + 1 < allocno1->group_size
|
||||
&& allocno2->offset + 1 < allocno2->group_size)
|
||||
{
|
||||
if (is_chain_candidate (allocno1 + 1, allocno2 + 1))
|
||||
if (is_chain_candidate (allocno1 + 1, allocno2 + 1, ALL_REASONS))
|
||||
return 1;
|
||||
else
|
||||
return -1;
|
||||
|
@ -2019,7 +2023,7 @@ early_ra::strided_polarity_pref (allocno_info *allocno1,
|
|||
|
||||
if (allocno1->offset > 0 && allocno2->offset > 0)
|
||||
{
|
||||
if (is_chain_candidate (allocno1 - 1, allocno2 - 1))
|
||||
if (is_chain_candidate (allocno1 - 1, allocno2 - 1, ALL_REASONS))
|
||||
return 1;
|
||||
else
|
||||
return -1;
|
||||
|
@ -2215,38 +2219,37 @@ early_ra::cmp_increasing (const void *allocno1_ptr, const void *allocno2_ptr)
|
|||
}
|
||||
|
||||
// Return true if we should consider chaining ALLOCNO1 onto the head
|
||||
// of ALLOCNO2. This is just a local test of the two allocnos; it doesn't
|
||||
// guarantee that chaining them would give a self-consistent system.
|
||||
// of ALLOCNO2. STRICTNESS says whether we should take copy-elision
|
||||
// heuristics into account, or whether we should just consider things
|
||||
// that matter for correctness.
|
||||
//
|
||||
// This is just a local test of the two allocnos; it doesn't guarantee
|
||||
// that chaining them would give a self-consistent system.
|
||||
bool
|
||||
early_ra::is_chain_candidate (allocno_info *allocno1, allocno_info *allocno2)
|
||||
early_ra::is_chain_candidate (allocno_info *allocno1, allocno_info *allocno2,
|
||||
test_strictness strictness)
|
||||
{
|
||||
if (allocno2->is_shared ())
|
||||
return false;
|
||||
|
||||
if (allocno1->is_equiv)
|
||||
while (allocno1->is_equiv)
|
||||
allocno1 = m_allocnos[allocno1->related_allocno];
|
||||
|
||||
if (allocno2->start_point >= allocno1->end_point
|
||||
&& !allocno2->is_equiv_to (allocno1->id))
|
||||
return false;
|
||||
|
||||
if (allocno2->is_strong_copy_dest)
|
||||
{
|
||||
if (!allocno1->is_strong_copy_src
|
||||
|| allocno1->copy_dest != allocno2->id)
|
||||
return false;
|
||||
}
|
||||
else if (allocno2->is_copy_dest)
|
||||
if (allocno1->is_earlyclobbered
|
||||
&& allocno1->end_point == allocno2->start_point + 1)
|
||||
return false;
|
||||
|
||||
if (strictness == ALL_REASONS && allocno2->is_copy_dest)
|
||||
{
|
||||
if (allocno1->copy_dest != allocno2->id)
|
||||
return false;
|
||||
}
|
||||
else if (allocno1->is_earlyclobbered)
|
||||
{
|
||||
if (allocno1->end_point == allocno2->start_point + 1)
|
||||
if (allocno2->is_strong_copy_dest && !allocno1->is_strong_copy_src)
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -2470,8 +2473,7 @@ early_ra::try_to_chain_allocnos (allocno_info *allocno1,
|
|||
auto *head2 = m_allocnos[headi2];
|
||||
if (head1->chain_next != INVALID_ALLOCNO)
|
||||
return false;
|
||||
if (!head2->is_equiv_to (head1->id)
|
||||
&& head1->end_point <= head2->start_point)
|
||||
if (!is_chain_candidate (head1, head2, CORRECTNESS_ONLY))
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
@ -2620,7 +2622,7 @@ early_ra::form_chains ()
|
|||
auto *allocno2 = m_sorted_allocnos[sci];
|
||||
if (allocno2->chain_prev == INVALID_ALLOCNO)
|
||||
{
|
||||
if (!is_chain_candidate (allocno1, allocno2))
|
||||
if (!is_chain_candidate (allocno1, allocno2, ALL_REASONS))
|
||||
continue;
|
||||
chain_candidate_info candidate;
|
||||
candidate.allocno = allocno2;
|
||||
|
|
57
gcc/testsuite/gcc.target/aarch64/pr113295-2.c
Normal file
57
gcc/testsuite/gcc.target/aarch64/pr113295-2.c
Normal file
|
@ -0,0 +1,57 @@
|
|||
// { dg-do run }
|
||||
// { dg-options "-O2" }
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
void __attribute__ ((noinline))
|
||||
foo (int8_t **ptr)
|
||||
{
|
||||
int8x16_t v0 = vld1q_s8 (ptr[0]);
|
||||
int8x16_t v1 = vld1q_s8 (ptr[1]);
|
||||
int8x16_t v2 = vld1q_s8 (ptr[2]);
|
||||
int8x16_t v3 = vld1q_s8 (ptr[3]);
|
||||
int8x16_t v4 = vld1q_s8 (ptr[4]);
|
||||
|
||||
int8x16x4_t res0 = { v0, v1, v2, v3 };
|
||||
vst4q_s8 (ptr[5], res0);
|
||||
|
||||
int8x16_t add = vaddq_s8 (v2, v3);
|
||||
int8x16x3_t res1 = { v1, add, v3 };
|
||||
vst3q_s8 (ptr[6], res1);
|
||||
|
||||
int8x16x3_t res2 = { v0, v1, v2 };
|
||||
vst3q_s8 (ptr[7], res2);
|
||||
}
|
||||
|
||||
int8_t arr0[16] = { 1 };
|
||||
int8_t arr1[16] = { 2 };
|
||||
int8_t arr2[16] = { 4 };
|
||||
int8_t arr3[16] = { 8 };
|
||||
int8_t arr4[16] = { 16 };
|
||||
int8_t arr5[16 * 4];
|
||||
int8_t arr6[16 * 3];
|
||||
int8_t arr7[16 * 3];
|
||||
int8_t *ptr[] =
|
||||
{
|
||||
arr0,
|
||||
arr1,
|
||||
arr2,
|
||||
arr3,
|
||||
arr4,
|
||||
arr5,
|
||||
arr6,
|
||||
arr7
|
||||
};
|
||||
|
||||
int
|
||||
main (void)
|
||||
{
|
||||
foo (ptr);
|
||||
if (arr5[0] != 1 || arr5[1] != 2 || arr5[2] != 4 || arr5[3] != 8)
|
||||
__builtin_abort ();
|
||||
if (arr6[0] != 2 || arr6[1] != 12 || arr6[2] != 8)
|
||||
__builtin_abort ();
|
||||
if (arr7[0] != 1 || arr7[1] != 2 || arr7[2] != 4)
|
||||
__builtin_abort ();
|
||||
return 0;
|
||||
}
|
Loading…
Add table
Reference in a new issue