From 2341f675edadd6370147d2bc55ca7761a7ecfaa1 Mon Sep 17 00:00:00 2001 From: "Vladimir N. Makarov" Date: Tue, 25 Feb 2025 15:01:15 -0500 Subject: [PATCH] [PR115458][LRA]: Run split sub-pass more times In this PR case LRA needs to provide too many hard regs for insn reloads, where some reload pseudos require 8 aligned regs for themselves. As the last attempt, LRA tries to split live ranges of hard regs for insn reload pseudos. It is a very rare case. An inheritance pseudo involving a reload pseudo of the insn can be spilled in the assignment sub-pass run right after splitting and we need to run split sub-pass for the inheritance pseudo now. gcc/ChangeLog: PR target/115458 * lra-int.h (LRA_MAX_FAILED_SPLITS): Define and check its value. (lra_split_hard_reg_for): Change prototype. * lra.cc (lra): Try to split hard reg range several times after a failure. * lra-assigns.cc (lra_split_hard_reg_for): Add an arg, a flag of giving up. Report asm error and nullify the asm insn depending on the arg value. gcc/testsuite/ChangeLog: PR target/115458 * g++.target/riscv/pr115458.C: New. --- gcc/lra-assigns.cc | 50 +-- gcc/lra-int.h | 14 +- gcc/lra.cc | 14 +- gcc/testsuite/g++.target/riscv/pr115458.C | 357 ++++++++++++++++++++++ 4 files changed, 410 insertions(+), 25 deletions(-) create mode 100644 gcc/testsuite/g++.target/riscv/pr115458.C diff --git a/gcc/lra-assigns.cc b/gcc/lra-assigns.cc index f9e3dfc3d5a..480925ad894 100644 --- a/gcc/lra-assigns.cc +++ b/gcc/lra-assigns.cc @@ -1763,12 +1763,13 @@ find_reload_regno_insns (int regno, rtx_insn * &start, rtx_insn * &finish) return true; } -/* Process reload pseudos which did not get a hard reg, split a hard - reg live range in live range of a reload pseudo, and then return - TRUE. If we did not split a hard reg live range, report an error, - and return FALSE. */ +/* Process reload pseudos which did not get a hard reg, split a hard reg live + range in live range of a reload pseudo, and then return TRUE. Otherwise, + return FALSE. When FAIL_P is TRUE and if we did not split a hard reg live + range for failed reload pseudos, report an error and modify related asm + insns. */ bool -lra_split_hard_reg_for (void) +lra_split_hard_reg_for (bool fail_p) { int i, regno; rtx_insn *insn, *first, *last; @@ -1843,23 +1844,30 @@ lra_split_hard_reg_for (void) regno = u; bitmap_ior_into (&failed_reload_insns, &lra_reg_info[regno].insn_bitmap); - lra_setup_reg_renumber - (regno, ira_class_hard_regs[lra_get_allocno_class (regno)][0], false); - } - EXECUTE_IF_SET_IN_BITMAP (&failed_reload_insns, 0, u, bi) - { - insn = lra_insn_recog_data[u]->insn; - if (asm_noperands (PATTERN (insn)) >= 0) - { - asm_p = true; - lra_asm_insn_error (insn); - } - else if (!asm_p) - { - error ("unable to find a register to spill"); - fatal_insn ("this is the insn:", insn); - } + if (fail_p) + lra_setup_reg_renumber + (regno, ira_class_hard_regs[lra_get_allocno_class (regno)][0], false); } + if (fail_p) + EXECUTE_IF_SET_IN_BITMAP (&failed_reload_insns, 0, u, bi) + { + insn = lra_insn_recog_data[u]->insn; + if (asm_noperands (PATTERN (insn)) >= 0) + { + asm_p = true; + lra_asm_insn_error (insn); + if (JUMP_P (insn)) + ira_nullify_asm_goto (insn); + else + PATTERN (insn) = gen_rtx_USE (VOIDmode, const0_rtx); + lra_invalidate_insn_data (insn); + } + else if (!asm_p) + { + error ("unable to find a register to spill"); + fatal_insn ("this is the insn:", insn); + } + } bitmap_clear (&failed_reload_pseudos); bitmap_clear (&failed_reload_insns); return false; diff --git a/gcc/lra-int.h b/gcc/lra-int.h index 1f89e069c4f..ad42f48cc82 100644 --- a/gcc/lra-int.h +++ b/gcc/lra-int.h @@ -252,6 +252,18 @@ typedef class lra_insn_recog_data *lra_insn_recog_data_t; for preventing LRA cycling in a bug case. */ #define LRA_MAX_ASSIGNMENT_ITERATION_NUMBER 30 +/* Maximum allowed number of tries to split hard reg live ranges after failure + in assignment of reload pseudos. Theoretical bound for the value is the + number of the insn reload pseudos plus the number of inheritance pseudos + generated from the reload pseudos. This bound can be achieved when all the + reload pseudos and the inheritance pseudos require hard reg splitting for + their assignment. This is extremely unlikely event. */ +#define LRA_MAX_FAILED_SPLITS 10 + +#if LRA_MAX_FAILED_SPLITS >= LRA_MAX_ASSIGNMENT_ITERATION_NUMBER +#error wrong LRA_MAX_FAILED_SPLITS value +#endif + /* The maximal number of inheritance/split passes in LRA. It should be more 1 in order to perform caller saves transformations and much less MAX_CONSTRAINT_ITERATION_NUMBER to prevent LRA to do as many @@ -392,7 +404,7 @@ extern int lra_assignment_iter; extern int lra_assignment_iter_after_spill; extern void lra_setup_reg_renumber (int, int, bool); extern bool lra_assign (bool &); -extern bool lra_split_hard_reg_for (void); +extern bool lra_split_hard_reg_for (bool fail_p); /* lra-coalesce.cc: */ diff --git a/gcc/lra.cc b/gcc/lra.cc index daf9840cc3e..b753729d43d 100644 --- a/gcc/lra.cc +++ b/gcc/lra.cc @@ -2480,6 +2480,7 @@ lra (FILE *f, int verbose) lra_clear_live_ranges (); bool fails_p; lra_hard_reg_split_p = false; + int split_fails_num = 0; do { /* We need live ranges for lra_assign -- so build them. @@ -2493,7 +2494,7 @@ lra (FILE *f, int verbose) coalescing. If inheritance pseudos were spilled, the memory-memory moves involving them will be removed by pass undoing inheritance. */ - if (lra_simple_p) + if (lra_simple_p || lra_hard_reg_split_p) lra_assign (fails_p); else { @@ -2522,8 +2523,15 @@ lra (FILE *f, int verbose) if (live_p) lra_clear_live_ranges (); live_p = false; - if (! lra_split_hard_reg_for ()) - break; + /* See a comment for LRA_MAX_FAILED_SPLITS definition. */ + bool last_failed_split_p + = split_fails_num > LRA_MAX_FAILED_SPLITS; + if (! lra_split_hard_reg_for (last_failed_split_p)) + { + if (last_failed_split_p) + break; + split_fails_num++; + } lra_hard_reg_split_p = true; } } diff --git a/gcc/testsuite/g++.target/riscv/pr115458.C b/gcc/testsuite/g++.target/riscv/pr115458.C new file mode 100644 index 00000000000..2c8d90737e6 --- /dev/null +++ b/gcc/testsuite/g++.target/riscv/pr115458.C @@ -0,0 +1,357 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv1p0 -mabi=lp64d -misa-spec=20191213 -mtls-dialect=trad -march=rv64imafdc_v1p0_zmmul_zca_zcd_zve32f_zve32x_zve64d_zve64f_zve64x_zvl128b_zvl32b_zvl64b -O2 -std=c++17 -fno-exceptions -w" } */ + +typedef signed char int8_t; +typedef unsigned char uint8_t; +void Abort(...); +template struct EnableIfT; +template <> struct EnableIfT { + using type = void; +}; +template using EnableIf = typename EnableIfT::type; +template using MakeUnsigned = unsigned char; +template using MakeSigned = signed char; +template struct integral_constant { + static constexpr int value = __v; +}; +template using __bool_constant = integral_constant<__v>; +template using __enable_if_t = _Tp; +char *TargetName(); +template struct __uniq_ptr_impl { + template struct _Ptr { + using type = _Up *; + }; + using pointer = typename _Ptr<_Tp>::type; +}; +template class unique_ptr; +template struct unique_ptr<_Tp[], _Dp> { + template + unique_ptr(_Up, __enable_if_t<__bool_constant::value, _Del>); + typename __uniq_ptr_impl<_Tp>::pointer get(); + operator bool(); +}; +using AllocPtr = void *; +using FreePtr = void(); +template T AllocateAlignedItems(int, AllocPtr, void *); +struct AlignedFreer { + AlignedFreer(FreePtr, void *); +}; +template using AlignedFreeUniquePtr = unique_ptr; +AllocPtr AllocateAligned_alloc; +template +AlignedFreeUniquePtr AllocateAligned(int items, void *opaque) { + FreePtr free; + return AlignedFreeUniquePtr( + AllocateAlignedItems(items, AllocateAligned_alloc, opaque), + AlignedFreer(free, opaque)); +} +template AlignedFreeUniquePtr AllocateAligned(int items) { + return AllocateAligned(items, nullptr); +} +template void MakeTypeInfo(); +void AssertArrayEqual(void *, void *, char *, int); +#pragma riscv intrinsic "vector" +template struct Simd { + using T = Lane; + constexpr int Pow2() { return kPow2; } + template static constexpr int RebindPow2() { return kPow2; } + template using Rebind = Simd()>; +}; +template struct ClampNAndPow2 { + using type = Simd; +}; +template struct ScalableTagChecker { + using type = typename ClampNAndPow2::type; +}; +template +using ScalableTag = typename ScalableTagChecker::type; +template using TFromD = typename D::T; +template using Rebind = typename D::Rebind; +template using RebindToSigned = Rebind, D>; +template using RebindToUnsigned = Rebind, D>; +template struct DFromV_t; +template using DFromV = typename DFromV_t::type; +template <> struct DFromV_t { + using Lane = int8_t; + using type = ScalableTag; +}; +template <> struct DFromV_t { + using Lane = int8_t; + using type = ScalableTag; +}; +template <> struct DFromV_t { + using Lane = int8_t; + using type = ScalableTag; +}; +template <> struct DFromV_t { + using Lane = int8_t; + using type = ScalableTag; +}; +template <> struct DFromV_t { + using Lane = int8_t; + using type = ScalableTag; +}; +template <> struct DFromV_t { + using Lane = int8_t; + using type = ScalableTag; +}; +template <> struct DFromV_t { + using Lane = int8_t; + using type = ScalableTag; +}; +template int Lanes(Simd); +template int Lanes(Simd); +template int Lanes(Simd); +template int Lanes(Simd); +template int Lanes(Simd); +template int Lanes(Simd); +template int Lanes(Simd); +template vuint8mf8_t Set(Simd, uint8_t); +template vuint8mf4_t Set(Simd, uint8_t); +template vuint8mf2_t Set(Simd, uint8_t); +template vuint8m1_t Set(Simd, uint8_t); +template vuint8m2_t Set(Simd, uint8_t); +template vuint8m4_t Set(Simd, uint8_t); +template vuint8m8_t Set(Simd, uint8_t arg) { + return __riscv_vmv_v_x_u8m8(arg, 0); +} +template vint8mf8_t Set(Simd, int8_t); +template vint8mf4_t Set(Simd, int8_t); +template vint8mf2_t Set(Simd, int8_t); +template vint8m1_t Set(Simd, int8_t); +template vint8m2_t Set(Simd, int8_t); +template vint8m4_t Set(Simd, int8_t); +template vint8m8_t Set(Simd, int8_t); +template using VFromD = decltype(Set(D(), TFromD())); +template VFromD Zero(D d) { + RebindToUnsigned du; + return BitCast(d, Set(du, 0)); +} +template +vuint8mf8_t BitCastToByte(Simd, vuint8mf8_t); +template +vuint8mf4_t BitCastToByte(Simd, vuint8mf4_t); +template +vuint8mf2_t BitCastToByte(Simd, vuint8mf2_t); +template +vuint8m1_t BitCastToByte(Simd, vuint8m1_t); +template +vuint8m2_t BitCastToByte(Simd, vuint8m2_t); +template +vuint8m4_t BitCastToByte(Simd, vuint8m4_t); +template +vuint8m8_t BitCastToByte(Simd, vuint8m8_t v) { + return v; +} +template +vuint8mf8_t BitCastToByte(Simd, vint8mf8_t); +template vint8mf8_t BitCastFromByte(Simd, vuint8mf8_t); +template +vuint8mf4_t BitCastToByte(Simd, vint8mf4_t); +template vint8mf4_t BitCastFromByte(Simd, vuint8mf4_t); +template +vuint8mf2_t BitCastToByte(Simd, vint8mf2_t); +template vint8mf2_t BitCastFromByte(Simd, vuint8mf2_t); +template vuint8m1_t BitCastToByte(Simd, vint8m1_t); +template vint8m1_t BitCastFromByte(Simd, vuint8m1_t); +template vuint8m2_t BitCastToByte(Simd, vint8m2_t); +template vint8m2_t BitCastFromByte(Simd, vuint8m2_t); +template vuint8m4_t BitCastToByte(Simd, vint8m4_t); +template vint8m4_t BitCastFromByte(Simd, vuint8m4_t); +template vuint8m8_t BitCastToByte(Simd, vint8m8_t); +template vint8m8_t BitCastFromByte(Simd, vuint8m8_t v) { + return __riscv_vreinterpret_v_u8m8_i8m8(v); +} +template VFromD BitCast(D d, FromV v) { + return BitCastFromByte(d, BitCastToByte(d, v)); +} +vint8mf8_t And(vint8mf8_t, vint8mf8_t); +vint8mf4_t And(vint8mf4_t, vint8mf4_t); +vint8mf2_t And(vint8mf2_t, vint8mf2_t); +vint8m1_t And(vint8m1_t, vint8m1_t); +vint8m2_t And(vint8m2_t, vint8m2_t); +vint8m4_t And(vint8m4_t, vint8m4_t); +vint8m8_t And(vint8m8_t, vint8m8_t); +vint8mf8_t Xor(vint8mf8_t, vint8mf8_t); +vint8mf4_t Xor(vint8mf4_t, vint8mf4_t); +vint8mf2_t Xor(vint8mf2_t, vint8mf2_t); +vint8m1_t Xor(vint8m1_t, vint8m1_t); +vint8m2_t Xor(vint8m2_t, vint8m2_t); +vint8m4_t Xor(vint8m4_t, vint8m4_t); +vint8m8_t Xor(vint8m8_t, vint8m8_t); +template V AndNot(V); +template V Xor3(V); +template V Neg(V); +template vuint8mf8_t ShiftLeft(vuint8mf8_t); +template vuint8mf4_t ShiftLeft(vuint8mf4_t); +template vuint8mf2_t ShiftLeft(vuint8mf2_t); +template vuint8m1_t ShiftLeft(vuint8m1_t); +template vuint8m2_t ShiftLeft(vuint8m2_t); +template vuint8m4_t ShiftLeft(vuint8m4_t); +template vuint8m8_t ShiftLeft(vuint8m8_t); +vint8mf8_t MaskedSubOr(vint8mf8_t, vbool64_t, vint8mf8_t, vint8mf8_t); +vint8mf4_t MaskedSubOr(vint8mf4_t, vbool32_t, vint8mf4_t, vint8mf4_t); +vint8mf2_t MaskedSubOr(vint8mf2_t, vbool16_t, vint8mf2_t, vint8mf2_t); +vint8m1_t MaskedSubOr(vint8m1_t, vbool8_t, vint8m1_t, vint8m1_t); +vint8m2_t MaskedSubOr(vint8m2_t, vbool4_t, vint8m2_t, vint8m2_t); +vint8m4_t MaskedSubOr(vint8m4_t, vbool2_t, vint8m4_t, vint8m4_t); +vint8m8_t MaskedSubOr(vint8m8_t no, vbool1_t m, vint8m8_t a, vint8m8_t b) { + return __riscv_vsub_vv_i8m8_mu(m, no, a, b, 0); +} +vbool64_t Lt(vint8mf8_t, vint8mf8_t); +vbool32_t Lt(vint8mf4_t, vint8mf4_t); +vbool16_t Lt(vint8mf2_t, vint8mf2_t); +vbool8_t Lt(vint8m1_t, vint8m1_t); +vbool4_t Lt(vint8m2_t, vint8m2_t); +vbool2_t Lt(vint8m4_t, vint8m4_t); +vbool1_t Lt(vint8m8_t a, vint8m8_t b) { + return __riscv_vmslt_vv_i8m8_b1(a, b, 0); +} +template V BroadcastSignBit(V); +template V IfNegativeThenElse(V); +template void Store(vint8mf8_t, Simd, int8_t *); +template void Store(vint8mf4_t, Simd, int8_t *); +template void Store(vint8mf2_t, Simd, int8_t *); +template void Store(vint8m1_t, Simd, int8_t *); +template void Store(vint8m2_t, Simd, int8_t *); +template void Store(vint8m4_t, Simd, int8_t *); +template void Store(vint8m8_t, Simd, int8_t *); +template * = nullptr> +V InterleaveUpperBlocks(D, V, V) {} +template 2)> * = nullptr> +V InterleaveUpperBlocks(D, V, V); +template +constexpr bool IsGE128(Simd) { + return kPow2 >= 0; +} +template * = nullptr> +V InterleaveLower(D, V, V); +template * = nullptr> +V InterleaveLower(D, V, V); +template * = nullptr> +V InterleaveUpper(D d, V a, V b) { + return InterleaveUpperBlocks(d, a, b); +} +template * = nullptr> +V InterleaveUpper(D, V, V); +template VFromD Iota(D, T2); +template using Vec = decltype(Zero(D())); +template Vec SignBit(D); +template V IfNegativeThenElseZero(V); +template V IfNegativeThenZeroElse(V); +template V BitwiseIfThenElse(V, V, V); +template inline V IfNegativeThenNegOrUndefIfZero(V mask, V v) { + auto zero = Zero(DFromV()); + return MaskedSubOr(v, Lt(mask, zero), zero, v); +} +template Vec PositiveIota(D); +int AssertVecEqual_line; +template > +inline void AssertVecEqual(D d, Vec expected, Vec actual, char *) { + int N = Lanes(d); + auto expected_lanes = AllocateAligned(N), + actual_lanes = AllocateAligned(N); + if (expected_lanes && actual_lanes) + Abort("", ""); + Store(expected, d, expected_lanes.get()); + Store(actual, d, actual_lanes.get()); + MakeTypeInfo(); + char *target_name = TargetName(); + AssertArrayEqual(expected_lanes.get(), actual_lanes.get(), target_name, + AssertVecEqual_line); +} +template constexpr int MinPow2() { return sizeof(int) ? -3 : 0; } +template +struct ForeachPow2 { + static void Do(int min_lanes) { + ScalableTag d; + Lanes(d); + Test()(T(), d); + ForeachPow2::Do( + min_lanes); + } +}; +template +struct ForeachPow2 { + static void Do(int); +}; +template +using ForeachPow2Trim = + ForeachPow2(), 3, kAddMin <= kSubMax, Test>; +template struct ForExtendableVectors { + template void operator()(T) { + ForeachPow2Trim::Do(1); + } +}; +template struct ForPartialVectors { + template void operator()(T t) { + ForExtendableVectors()(t); + } +}; +template void ForSignedTypes(Func func) { func(int8_t()); } +struct TestIfNegative { + template void operator()(T, D d) { + auto vp = Iota(d, 1), vsignbit = SignBit(d); + RebindToSigned di; + RebindToUnsigned du; + BitCast(d, ShiftLeft)>(Iota(du, 1))); + auto m1 = Xor3(BitCast(d, Set(du, {}))); + auto x1 = Xor(vp, BitCast(d, Set(d, {}))); + auto x2 = Xor(vp, BitCast(d, Set(d, {}))); + Xor(m1, vsignbit); + auto m1_s = BitCast(d, BroadcastSignBit(BitCast(di, m1))); + auto expected_2 = BitwiseIfThenElse(m1_s, x2, x1); + AssertVecEqual(d, expected_2, IfNegativeThenElse(x2), ""); + auto expected_3 = And(m1_s, x1); + auto expected_4 = AndNot(x2); + AssertVecEqual(d, expected_3, IfNegativeThenElseZero(x1), ""); + AssertVecEqual(d, expected_3, IfNegativeThenZeroElse(x1), ""); + AssertVecEqual(d, expected_4, IfNegativeThenZeroElse(x2), ""); + AssertVecEqual(d, expected_4, IfNegativeThenElseZero(x2), ""); + } +}; +void TestAllIfNegative() { + ForSignedTypes(ForPartialVectors()); +} +template +void TestMoreThan1LaneIfNegativeThenNegOrUndefIfZero(D d, Vec v1) { + Vec v2, v3 = InterleaveLower(d, v1, v1), v5 = InterleaveLower(d, v1, v2); + if (Lanes(d) < 2) + return; + Vec v4 = InterleaveUpper(d, v1, v1); + Vec v6 = InterleaveUpper(d, v1, v2); + Vec v7 = InterleaveLower(d, v2, v1); + Vec v8 = InterleaveUpper(d, v2, v1); + AssertVecEqual(d, v3, IfNegativeThenNegOrUndefIfZero(v3, v3), ""); + AssertVecEqual(d, v4, IfNegativeThenNegOrUndefIfZero(v4, v4), ""); + AssertVecEqual(d, v4, IfNegativeThenNegOrUndefIfZero(v8, v8), ""); + AssertVecEqual(d, v6, IfNegativeThenNegOrUndefIfZero(v4, v6), ""); + AssertVecEqual(d, v7, IfNegativeThenNegOrUndefIfZero(v3, v7), ""); + AssertVecEqual(d, v8, IfNegativeThenNegOrUndefIfZero(v4, v8), ""); + Vec zero = Zero(d); + AssertVecEqual(d, zero, IfNegativeThenNegOrUndefIfZero(v3, zero), ""); + AssertVecEqual(d, zero, IfNegativeThenNegOrUndefIfZero(v4, zero), ""); + AssertVecEqual(d, zero, IfNegativeThenNegOrUndefIfZero(v5, zero), ""); + AssertVecEqual(d, zero, IfNegativeThenNegOrUndefIfZero(v6, zero), ""); + AssertVecEqual(d, zero, IfNegativeThenNegOrUndefIfZero(v7, zero), ""); + AssertVecEqual(d, zero, IfNegativeThenNegOrUndefIfZero(v8, zero), ""); +} +struct TestIfNegativeThenNegOrUndefIfZero { + template void operator()(T, D d) { + auto v1 = PositiveIota(d), v2 = Neg(v1), zero = Zero(d), vmin = Set(d, 0), + vmax = Set(d, 0); + AssertVecEqual(d, v2, IfNegativeThenNegOrUndefIfZero(v1, v2), ""); + AssertVecEqual(d, v2, IfNegativeThenNegOrUndefIfZero(v2, v1), ""); + AssertVecEqual(d, v1, IfNegativeThenNegOrUndefIfZero(v2, v2), ""); + AssertVecEqual(d, zero, IfNegativeThenNegOrUndefIfZero(zero, zero), ""); + AssertVecEqual(d, zero, IfNegativeThenNegOrUndefIfZero(v1, zero), ""); + AssertVecEqual(d, zero, IfNegativeThenNegOrUndefIfZero(v2, zero), ""); + AssertVecEqual(d, v1, IfNegativeThenNegOrUndefIfZero(vmin, v2), ""); + AssertVecEqual(d, v1, IfNegativeThenNegOrUndefIfZero(vmax, v1), ""); + AssertVecEqual(d, v2, IfNegativeThenNegOrUndefIfZero(vmax, v2), ""); + TestMoreThan1LaneIfNegativeThenNegOrUndefIfZero(d, v1); + } +}; +void TestAllIfNegativeThenNegOrUndefIfZero() { + ForSignedTypes(ForPartialVectors()); +}