[PR115458][LRA]: Run split sub-pass more times
In this PR case LRA needs to provide too many hard regs for insn reloads, where some reload pseudos require 8 aligned regs for themselves. As the last attempt, LRA tries to split live ranges of hard regs for insn reload pseudos. It is a very rare case. An inheritance pseudo involving a reload pseudo of the insn can be spilled in the assignment sub-pass run right after splitting and we need to run split sub-pass for the inheritance pseudo now. gcc/ChangeLog: PR target/115458 * lra-int.h (LRA_MAX_FAILED_SPLITS): Define and check its value. (lra_split_hard_reg_for): Change prototype. * lra.cc (lra): Try to split hard reg range several times after a failure. * lra-assigns.cc (lra_split_hard_reg_for): Add an arg, a flag of giving up. Report asm error and nullify the asm insn depending on the arg value. gcc/testsuite/ChangeLog: PR target/115458 * g++.target/riscv/pr115458.C: New.
This commit is contained in:
parent
0bb431d0a7
commit
2341f675ed
4 changed files with 410 additions and 25 deletions
|
@ -1763,12 +1763,13 @@ find_reload_regno_insns (int regno, rtx_insn * &start, rtx_insn * &finish)
|
|||
return true;
|
||||
}
|
||||
|
||||
/* Process reload pseudos which did not get a hard reg, split a hard
|
||||
reg live range in live range of a reload pseudo, and then return
|
||||
TRUE. If we did not split a hard reg live range, report an error,
|
||||
and return FALSE. */
|
||||
/* Process reload pseudos which did not get a hard reg, split a hard reg live
|
||||
range in live range of a reload pseudo, and then return TRUE. Otherwise,
|
||||
return FALSE. When FAIL_P is TRUE and if we did not split a hard reg live
|
||||
range for failed reload pseudos, report an error and modify related asm
|
||||
insns. */
|
||||
bool
|
||||
lra_split_hard_reg_for (void)
|
||||
lra_split_hard_reg_for (bool fail_p)
|
||||
{
|
||||
int i, regno;
|
||||
rtx_insn *insn, *first, *last;
|
||||
|
@ -1843,23 +1844,30 @@ lra_split_hard_reg_for (void)
|
|||
regno = u;
|
||||
bitmap_ior_into (&failed_reload_insns,
|
||||
&lra_reg_info[regno].insn_bitmap);
|
||||
lra_setup_reg_renumber
|
||||
(regno, ira_class_hard_regs[lra_get_allocno_class (regno)][0], false);
|
||||
}
|
||||
EXECUTE_IF_SET_IN_BITMAP (&failed_reload_insns, 0, u, bi)
|
||||
{
|
||||
insn = lra_insn_recog_data[u]->insn;
|
||||
if (asm_noperands (PATTERN (insn)) >= 0)
|
||||
{
|
||||
asm_p = true;
|
||||
lra_asm_insn_error (insn);
|
||||
}
|
||||
else if (!asm_p)
|
||||
{
|
||||
error ("unable to find a register to spill");
|
||||
fatal_insn ("this is the insn:", insn);
|
||||
}
|
||||
if (fail_p)
|
||||
lra_setup_reg_renumber
|
||||
(regno, ira_class_hard_regs[lra_get_allocno_class (regno)][0], false);
|
||||
}
|
||||
if (fail_p)
|
||||
EXECUTE_IF_SET_IN_BITMAP (&failed_reload_insns, 0, u, bi)
|
||||
{
|
||||
insn = lra_insn_recog_data[u]->insn;
|
||||
if (asm_noperands (PATTERN (insn)) >= 0)
|
||||
{
|
||||
asm_p = true;
|
||||
lra_asm_insn_error (insn);
|
||||
if (JUMP_P (insn))
|
||||
ira_nullify_asm_goto (insn);
|
||||
else
|
||||
PATTERN (insn) = gen_rtx_USE (VOIDmode, const0_rtx);
|
||||
lra_invalidate_insn_data (insn);
|
||||
}
|
||||
else if (!asm_p)
|
||||
{
|
||||
error ("unable to find a register to spill");
|
||||
fatal_insn ("this is the insn:", insn);
|
||||
}
|
||||
}
|
||||
bitmap_clear (&failed_reload_pseudos);
|
||||
bitmap_clear (&failed_reload_insns);
|
||||
return false;
|
||||
|
|
|
@ -252,6 +252,18 @@ typedef class lra_insn_recog_data *lra_insn_recog_data_t;
|
|||
for preventing LRA cycling in a bug case. */
|
||||
#define LRA_MAX_ASSIGNMENT_ITERATION_NUMBER 30
|
||||
|
||||
/* Maximum allowed number of tries to split hard reg live ranges after failure
|
||||
in assignment of reload pseudos. Theoretical bound for the value is the
|
||||
number of the insn reload pseudos plus the number of inheritance pseudos
|
||||
generated from the reload pseudos. This bound can be achieved when all the
|
||||
reload pseudos and the inheritance pseudos require hard reg splitting for
|
||||
their assignment. This is extremely unlikely event. */
|
||||
#define LRA_MAX_FAILED_SPLITS 10
|
||||
|
||||
#if LRA_MAX_FAILED_SPLITS >= LRA_MAX_ASSIGNMENT_ITERATION_NUMBER
|
||||
#error wrong LRA_MAX_FAILED_SPLITS value
|
||||
#endif
|
||||
|
||||
/* The maximal number of inheritance/split passes in LRA. It should
|
||||
be more 1 in order to perform caller saves transformations and much
|
||||
less MAX_CONSTRAINT_ITERATION_NUMBER to prevent LRA to do as many
|
||||
|
@ -392,7 +404,7 @@ extern int lra_assignment_iter;
|
|||
extern int lra_assignment_iter_after_spill;
|
||||
extern void lra_setup_reg_renumber (int, int, bool);
|
||||
extern bool lra_assign (bool &);
|
||||
extern bool lra_split_hard_reg_for (void);
|
||||
extern bool lra_split_hard_reg_for (bool fail_p);
|
||||
|
||||
/* lra-coalesce.cc: */
|
||||
|
||||
|
|
14
gcc/lra.cc
14
gcc/lra.cc
|
@ -2480,6 +2480,7 @@ lra (FILE *f, int verbose)
|
|||
lra_clear_live_ranges ();
|
||||
bool fails_p;
|
||||
lra_hard_reg_split_p = false;
|
||||
int split_fails_num = 0;
|
||||
do
|
||||
{
|
||||
/* We need live ranges for lra_assign -- so build them.
|
||||
|
@ -2493,7 +2494,7 @@ lra (FILE *f, int verbose)
|
|||
coalescing. If inheritance pseudos were spilled, the
|
||||
memory-memory moves involving them will be removed by
|
||||
pass undoing inheritance. */
|
||||
if (lra_simple_p)
|
||||
if (lra_simple_p || lra_hard_reg_split_p)
|
||||
lra_assign (fails_p);
|
||||
else
|
||||
{
|
||||
|
@ -2522,8 +2523,15 @@ lra (FILE *f, int verbose)
|
|||
if (live_p)
|
||||
lra_clear_live_ranges ();
|
||||
live_p = false;
|
||||
if (! lra_split_hard_reg_for ())
|
||||
break;
|
||||
/* See a comment for LRA_MAX_FAILED_SPLITS definition. */
|
||||
bool last_failed_split_p
|
||||
= split_fails_num > LRA_MAX_FAILED_SPLITS;
|
||||
if (! lra_split_hard_reg_for (last_failed_split_p))
|
||||
{
|
||||
if (last_failed_split_p)
|
||||
break;
|
||||
split_fails_num++;
|
||||
}
|
||||
lra_hard_reg_split_p = true;
|
||||
}
|
||||
}
|
||||
|
|
357
gcc/testsuite/g++.target/riscv/pr115458.C
Normal file
357
gcc/testsuite/g++.target/riscv/pr115458.C
Normal file
|
@ -0,0 +1,357 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-march=rv64gcv1p0 -mabi=lp64d -misa-spec=20191213 -mtls-dialect=trad -march=rv64imafdc_v1p0_zmmul_zca_zcd_zve32f_zve32x_zve64d_zve64f_zve64x_zvl128b_zvl32b_zvl64b -O2 -std=c++17 -fno-exceptions -w" } */
|
||||
|
||||
typedef signed char int8_t;
|
||||
typedef unsigned char uint8_t;
|
||||
void Abort(...);
|
||||
template <bool> struct EnableIfT;
|
||||
template <> struct EnableIfT<true> {
|
||||
using type = void;
|
||||
};
|
||||
template <bool Condition> using EnableIf = typename EnableIfT<Condition>::type;
|
||||
template <typename> using MakeUnsigned = unsigned char;
|
||||
template <typename> using MakeSigned = signed char;
|
||||
template <int __v> struct integral_constant {
|
||||
static constexpr int value = __v;
|
||||
};
|
||||
template <bool __v> using __bool_constant = integral_constant<__v>;
|
||||
template <bool, typename _Tp> using __enable_if_t = _Tp;
|
||||
char *TargetName();
|
||||
template <typename _Tp> struct __uniq_ptr_impl {
|
||||
template <typename _Up> struct _Ptr {
|
||||
using type = _Up *;
|
||||
};
|
||||
using pointer = typename _Ptr<_Tp>::type;
|
||||
};
|
||||
template <typename _Tp, typename = _Tp> class unique_ptr;
|
||||
template <typename _Tp, typename _Dp> struct unique_ptr<_Tp[], _Dp> {
|
||||
template <typename _Up, typename _Del>
|
||||
unique_ptr(_Up, __enable_if_t<__bool_constant<false>::value, _Del>);
|
||||
typename __uniq_ptr_impl<_Tp>::pointer get();
|
||||
operator bool();
|
||||
};
|
||||
using AllocPtr = void *;
|
||||
using FreePtr = void();
|
||||
template <typename T> T AllocateAlignedItems(int, AllocPtr, void *);
|
||||
struct AlignedFreer {
|
||||
AlignedFreer(FreePtr, void *);
|
||||
};
|
||||
template <typename T> using AlignedFreeUniquePtr = unique_ptr<T>;
|
||||
AllocPtr AllocateAligned_alloc;
|
||||
template <typename T>
|
||||
AlignedFreeUniquePtr<T[]> AllocateAligned(int items, void *opaque) {
|
||||
FreePtr free;
|
||||
return AlignedFreeUniquePtr<T[]>(
|
||||
AllocateAlignedItems<T>(items, AllocateAligned_alloc, opaque),
|
||||
AlignedFreer(free, opaque));
|
||||
}
|
||||
template <typename T> AlignedFreeUniquePtr<T[]> AllocateAligned(int items) {
|
||||
return AllocateAligned<T>(items, nullptr);
|
||||
}
|
||||
template <typename> void MakeTypeInfo();
|
||||
void AssertArrayEqual(void *, void *, char *, int);
|
||||
#pragma riscv intrinsic "vector"
|
||||
template <typename Lane, int, int kPow2> struct Simd {
|
||||
using T = Lane;
|
||||
constexpr int Pow2() { return kPow2; }
|
||||
template <typename> static constexpr int RebindPow2() { return kPow2; }
|
||||
template <typename NewT> using Rebind = Simd<NewT, 0, RebindPow2<NewT>()>;
|
||||
};
|
||||
template <typename T, int kPow2> struct ClampNAndPow2 {
|
||||
using type = Simd<T, 6, kPow2>;
|
||||
};
|
||||
template <typename T, int kPow2> struct ScalableTagChecker {
|
||||
using type = typename ClampNAndPow2<T, kPow2>::type;
|
||||
};
|
||||
template <typename T, int kPow2>
|
||||
using ScalableTag = typename ScalableTagChecker<T, kPow2>::type;
|
||||
template <class D> using TFromD = typename D::T;
|
||||
template <class T, class D> using Rebind = typename D::Rebind<T>;
|
||||
template <class D> using RebindToSigned = Rebind<MakeSigned<D>, D>;
|
||||
template <class D> using RebindToUnsigned = Rebind<MakeUnsigned<D>, D>;
|
||||
template <class> struct DFromV_t;
|
||||
template <class V> using DFromV = typename DFromV_t<V>::type;
|
||||
template <> struct DFromV_t<vint8mf8_t> {
|
||||
using Lane = int8_t;
|
||||
using type = ScalableTag<Lane, -3>;
|
||||
};
|
||||
template <> struct DFromV_t<vint8mf4_t> {
|
||||
using Lane = int8_t;
|
||||
using type = ScalableTag<Lane, -2>;
|
||||
};
|
||||
template <> struct DFromV_t<vint8mf2_t> {
|
||||
using Lane = int8_t;
|
||||
using type = ScalableTag<Lane, -1>;
|
||||
};
|
||||
template <> struct DFromV_t<vint8m1_t> {
|
||||
using Lane = int8_t;
|
||||
using type = ScalableTag<Lane, 0>;
|
||||
};
|
||||
template <> struct DFromV_t<vint8m2_t> {
|
||||
using Lane = int8_t;
|
||||
using type = ScalableTag<Lane, 1>;
|
||||
};
|
||||
template <> struct DFromV_t<vint8m4_t> {
|
||||
using Lane = int8_t;
|
||||
using type = ScalableTag<Lane, 2>;
|
||||
};
|
||||
template <> struct DFromV_t<vint8m8_t> {
|
||||
using Lane = int8_t;
|
||||
using type = ScalableTag<Lane, 3>;
|
||||
};
|
||||
template <int N> int Lanes(Simd<int8_t, N, -3>);
|
||||
template <int N> int Lanes(Simd<int8_t, N, -2>);
|
||||
template <int N> int Lanes(Simd<int8_t, N, -1>);
|
||||
template <int N> int Lanes(Simd<int8_t, N, 0>);
|
||||
template <int N> int Lanes(Simd<int8_t, N, 1>);
|
||||
template <int N> int Lanes(Simd<int8_t, N, 2>);
|
||||
template <int N> int Lanes(Simd<int8_t, N, 3>);
|
||||
template <int N> vuint8mf8_t Set(Simd<uint8_t, N, -3>, uint8_t);
|
||||
template <int N> vuint8mf4_t Set(Simd<uint8_t, N, -2>, uint8_t);
|
||||
template <int N> vuint8mf2_t Set(Simd<uint8_t, N, -1>, uint8_t);
|
||||
template <int N> vuint8m1_t Set(Simd<uint8_t, N, 0>, uint8_t);
|
||||
template <int N> vuint8m2_t Set(Simd<uint8_t, N, 1>, uint8_t);
|
||||
template <int N> vuint8m4_t Set(Simd<uint8_t, N, 2>, uint8_t);
|
||||
template <int N> vuint8m8_t Set(Simd<uint8_t, N, 3>, uint8_t arg) {
|
||||
return __riscv_vmv_v_x_u8m8(arg, 0);
|
||||
}
|
||||
template <int N> vint8mf8_t Set(Simd<int8_t, N, -3>, int8_t);
|
||||
template <int N> vint8mf4_t Set(Simd<int8_t, N, -2>, int8_t);
|
||||
template <int N> vint8mf2_t Set(Simd<int8_t, N, -1>, int8_t);
|
||||
template <int N> vint8m1_t Set(Simd<int8_t, N, 0>, int8_t);
|
||||
template <int N> vint8m2_t Set(Simd<int8_t, N, 1>, int8_t);
|
||||
template <int N> vint8m4_t Set(Simd<int8_t, N, 2>, int8_t);
|
||||
template <int N> vint8m8_t Set(Simd<int8_t, N, 3>, int8_t);
|
||||
template <class D> using VFromD = decltype(Set(D(), TFromD<D>()));
|
||||
template <class D> VFromD<D> Zero(D d) {
|
||||
RebindToUnsigned<decltype(d)> du;
|
||||
return BitCast(d, Set(du, 0));
|
||||
}
|
||||
template <typename T, int N>
|
||||
vuint8mf8_t BitCastToByte(Simd<T, N, -3>, vuint8mf8_t);
|
||||
template <typename T, int N>
|
||||
vuint8mf4_t BitCastToByte(Simd<T, N, -2>, vuint8mf4_t);
|
||||
template <typename T, int N>
|
||||
vuint8mf2_t BitCastToByte(Simd<T, N, -1>, vuint8mf2_t);
|
||||
template <typename T, int N>
|
||||
vuint8m1_t BitCastToByte(Simd<T, N, 0>, vuint8m1_t);
|
||||
template <typename T, int N>
|
||||
vuint8m2_t BitCastToByte(Simd<T, N, 1>, vuint8m2_t);
|
||||
template <typename T, int N>
|
||||
vuint8m4_t BitCastToByte(Simd<T, N, 2>, vuint8m4_t);
|
||||
template <typename T, int N>
|
||||
vuint8m8_t BitCastToByte(Simd<T, N, 3>, vuint8m8_t v) {
|
||||
return v;
|
||||
}
|
||||
template <typename T, int N>
|
||||
vuint8mf8_t BitCastToByte(Simd<T, N, -3>, vint8mf8_t);
|
||||
template <int N> vint8mf8_t BitCastFromByte(Simd<int8_t, N, -3>, vuint8mf8_t);
|
||||
template <typename T, int N>
|
||||
vuint8mf4_t BitCastToByte(Simd<T, N, -2>, vint8mf4_t);
|
||||
template <int N> vint8mf4_t BitCastFromByte(Simd<int8_t, N, -2>, vuint8mf4_t);
|
||||
template <typename T, int N>
|
||||
vuint8mf2_t BitCastToByte(Simd<T, N, -1>, vint8mf2_t);
|
||||
template <int N> vint8mf2_t BitCastFromByte(Simd<int8_t, N, -1>, vuint8mf2_t);
|
||||
template <typename T, int N> vuint8m1_t BitCastToByte(Simd<T, N, 0>, vint8m1_t);
|
||||
template <int N> vint8m1_t BitCastFromByte(Simd<int8_t, N, 0>, vuint8m1_t);
|
||||
template <typename T, int N> vuint8m2_t BitCastToByte(Simd<T, N, 1>, vint8m2_t);
|
||||
template <int N> vint8m2_t BitCastFromByte(Simd<int8_t, N, 1>, vuint8m2_t);
|
||||
template <typename T, int N> vuint8m4_t BitCastToByte(Simd<T, N, 2>, vint8m4_t);
|
||||
template <int N> vint8m4_t BitCastFromByte(Simd<int8_t, N, 2>, vuint8m4_t);
|
||||
template <typename T, int N> vuint8m8_t BitCastToByte(Simd<T, N, 3>, vint8m8_t);
|
||||
template <int N> vint8m8_t BitCastFromByte(Simd<int8_t, N, 3>, vuint8m8_t v) {
|
||||
return __riscv_vreinterpret_v_u8m8_i8m8(v);
|
||||
}
|
||||
template <class D, class FromV> VFromD<D> BitCast(D d, FromV v) {
|
||||
return BitCastFromByte(d, BitCastToByte(d, v));
|
||||
}
|
||||
vint8mf8_t And(vint8mf8_t, vint8mf8_t);
|
||||
vint8mf4_t And(vint8mf4_t, vint8mf4_t);
|
||||
vint8mf2_t And(vint8mf2_t, vint8mf2_t);
|
||||
vint8m1_t And(vint8m1_t, vint8m1_t);
|
||||
vint8m2_t And(vint8m2_t, vint8m2_t);
|
||||
vint8m4_t And(vint8m4_t, vint8m4_t);
|
||||
vint8m8_t And(vint8m8_t, vint8m8_t);
|
||||
vint8mf8_t Xor(vint8mf8_t, vint8mf8_t);
|
||||
vint8mf4_t Xor(vint8mf4_t, vint8mf4_t);
|
||||
vint8mf2_t Xor(vint8mf2_t, vint8mf2_t);
|
||||
vint8m1_t Xor(vint8m1_t, vint8m1_t);
|
||||
vint8m2_t Xor(vint8m2_t, vint8m2_t);
|
||||
vint8m4_t Xor(vint8m4_t, vint8m4_t);
|
||||
vint8m8_t Xor(vint8m8_t, vint8m8_t);
|
||||
template <class V> V AndNot(V);
|
||||
template <class V> V Xor3(V);
|
||||
template <class V> V Neg(V);
|
||||
template <int> vuint8mf8_t ShiftLeft(vuint8mf8_t);
|
||||
template <int> vuint8mf4_t ShiftLeft(vuint8mf4_t);
|
||||
template <int> vuint8mf2_t ShiftLeft(vuint8mf2_t);
|
||||
template <int> vuint8m1_t ShiftLeft(vuint8m1_t);
|
||||
template <int> vuint8m2_t ShiftLeft(vuint8m2_t);
|
||||
template <int> vuint8m4_t ShiftLeft(vuint8m4_t);
|
||||
template <int> vuint8m8_t ShiftLeft(vuint8m8_t);
|
||||
vint8mf8_t MaskedSubOr(vint8mf8_t, vbool64_t, vint8mf8_t, vint8mf8_t);
|
||||
vint8mf4_t MaskedSubOr(vint8mf4_t, vbool32_t, vint8mf4_t, vint8mf4_t);
|
||||
vint8mf2_t MaskedSubOr(vint8mf2_t, vbool16_t, vint8mf2_t, vint8mf2_t);
|
||||
vint8m1_t MaskedSubOr(vint8m1_t, vbool8_t, vint8m1_t, vint8m1_t);
|
||||
vint8m2_t MaskedSubOr(vint8m2_t, vbool4_t, vint8m2_t, vint8m2_t);
|
||||
vint8m4_t MaskedSubOr(vint8m4_t, vbool2_t, vint8m4_t, vint8m4_t);
|
||||
vint8m8_t MaskedSubOr(vint8m8_t no, vbool1_t m, vint8m8_t a, vint8m8_t b) {
|
||||
return __riscv_vsub_vv_i8m8_mu(m, no, a, b, 0);
|
||||
}
|
||||
vbool64_t Lt(vint8mf8_t, vint8mf8_t);
|
||||
vbool32_t Lt(vint8mf4_t, vint8mf4_t);
|
||||
vbool16_t Lt(vint8mf2_t, vint8mf2_t);
|
||||
vbool8_t Lt(vint8m1_t, vint8m1_t);
|
||||
vbool4_t Lt(vint8m2_t, vint8m2_t);
|
||||
vbool2_t Lt(vint8m4_t, vint8m4_t);
|
||||
vbool1_t Lt(vint8m8_t a, vint8m8_t b) {
|
||||
return __riscv_vmslt_vv_i8m8_b1(a, b, 0);
|
||||
}
|
||||
template <class V> V BroadcastSignBit(V);
|
||||
template <class V> V IfNegativeThenElse(V);
|
||||
template <int N> void Store(vint8mf8_t, Simd<int8_t, N, -3>, int8_t *);
|
||||
template <int N> void Store(vint8mf4_t, Simd<int8_t, N, -2>, int8_t *);
|
||||
template <int N> void Store(vint8mf2_t, Simd<int8_t, N, -1>, int8_t *);
|
||||
template <int N> void Store(vint8m1_t, Simd<int8_t, N, 0>, int8_t *);
|
||||
template <int N> void Store(vint8m2_t, Simd<int8_t, N, 1>, int8_t *);
|
||||
template <int N> void Store(vint8m4_t, Simd<int8_t, N, 2>, int8_t *);
|
||||
template <int N> void Store(vint8m8_t, Simd<int8_t, N, 3>, int8_t *);
|
||||
template <class D, class V, EnableIf<D().Pow2() <= 2> * = nullptr>
|
||||
V InterleaveUpperBlocks(D, V, V) {}
|
||||
template <class D, class V, EnableIf<(D().Pow2() > 2)> * = nullptr>
|
||||
V InterleaveUpperBlocks(D, V, V);
|
||||
template <typename T, int N, int kPow2>
|
||||
constexpr bool IsGE128(Simd<T, N, kPow2>) {
|
||||
return kPow2 >= 0;
|
||||
}
|
||||
template <class D, class V, EnableIf<IsGE128(D())> * = nullptr>
|
||||
V InterleaveLower(D, V, V);
|
||||
template <class D, class V, EnableIf<!IsGE128(D())> * = nullptr>
|
||||
V InterleaveLower(D, V, V);
|
||||
template <class D, class V, EnableIf<IsGE128(D())> * = nullptr>
|
||||
V InterleaveUpper(D d, V a, V b) {
|
||||
return InterleaveUpperBlocks(d, a, b);
|
||||
}
|
||||
template <class D, class V, EnableIf<!IsGE128(D())> * = nullptr>
|
||||
V InterleaveUpper(D, V, V);
|
||||
template <class D, typename T2> VFromD<D> Iota(D, T2);
|
||||
template <class D> using Vec = decltype(Zero(D()));
|
||||
template <class D> Vec<D> SignBit(D);
|
||||
template <class V> V IfNegativeThenElseZero(V);
|
||||
template <class V> V IfNegativeThenZeroElse(V);
|
||||
template <class V> V BitwiseIfThenElse(V, V, V);
|
||||
template <class V> inline V IfNegativeThenNegOrUndefIfZero(V mask, V v) {
|
||||
auto zero = Zero(DFromV<V>());
|
||||
return MaskedSubOr(v, Lt(mask, zero), zero, v);
|
||||
}
|
||||
template <class D> Vec<D> PositiveIota(D);
|
||||
int AssertVecEqual_line;
|
||||
template <class D, typename T = TFromD<D>>
|
||||
inline void AssertVecEqual(D d, Vec<D> expected, Vec<D> actual, char *) {
|
||||
int N = Lanes(d);
|
||||
auto expected_lanes = AllocateAligned<T>(N),
|
||||
actual_lanes = AllocateAligned<T>(N);
|
||||
if (expected_lanes && actual_lanes)
|
||||
Abort("", "");
|
||||
Store(expected, d, expected_lanes.get());
|
||||
Store(actual, d, actual_lanes.get());
|
||||
MakeTypeInfo<T>();
|
||||
char *target_name = TargetName();
|
||||
AssertArrayEqual(expected_lanes.get(), actual_lanes.get(), target_name,
|
||||
AssertVecEqual_line);
|
||||
}
|
||||
template <typename> constexpr int MinPow2() { return sizeof(int) ? -3 : 0; }
|
||||
template <typename T, int kPow2, int kMaxPow2, int, class Test>
|
||||
struct ForeachPow2 {
|
||||
static void Do(int min_lanes) {
|
||||
ScalableTag<T, kPow2> d;
|
||||
Lanes(d);
|
||||
Test()(T(), d);
|
||||
ForeachPow2<T, kPow2 + 1, kMaxPow2, kPow2 + 1 <= kMaxPow2, Test>::Do(
|
||||
min_lanes);
|
||||
}
|
||||
};
|
||||
template <typename T, int kPow2, int kMaxPow2, class Test>
|
||||
struct ForeachPow2<T, kPow2, kMaxPow2, false, Test> {
|
||||
static void Do(int);
|
||||
};
|
||||
template <typename T, int kAddMin, int kSubMax, class Test>
|
||||
using ForeachPow2Trim =
|
||||
ForeachPow2<T, MinPow2<T>(), 3, kAddMin <= kSubMax, Test>;
|
||||
template <class Test, int kPow2> struct ForExtendableVectors {
|
||||
template <typename T> void operator()(T) {
|
||||
ForeachPow2Trim<T, 0, kPow2, Test>::Do(1);
|
||||
}
|
||||
};
|
||||
template <class Test> struct ForPartialVectors {
|
||||
template <typename T> void operator()(T t) {
|
||||
ForExtendableVectors<Test, 0>()(t);
|
||||
}
|
||||
};
|
||||
template <class Func> void ForSignedTypes(Func func) { func(int8_t()); }
|
||||
struct TestIfNegative {
|
||||
template <class T, class D> void operator()(T, D d) {
|
||||
auto vp = Iota(d, 1), vsignbit = SignBit(d);
|
||||
RebindToSigned<decltype(d)> di;
|
||||
RebindToUnsigned<decltype(d)> du;
|
||||
BitCast(d, ShiftLeft<sizeof(TFromD<decltype(d)>)>(Iota(du, 1)));
|
||||
auto m1 = Xor3(BitCast(d, Set(du, {})));
|
||||
auto x1 = Xor(vp, BitCast(d, Set(d, {})));
|
||||
auto x2 = Xor(vp, BitCast(d, Set(d, {})));
|
||||
Xor(m1, vsignbit);
|
||||
auto m1_s = BitCast(d, BroadcastSignBit(BitCast(di, m1)));
|
||||
auto expected_2 = BitwiseIfThenElse(m1_s, x2, x1);
|
||||
AssertVecEqual(d, expected_2, IfNegativeThenElse(x2), "");
|
||||
auto expected_3 = And(m1_s, x1);
|
||||
auto expected_4 = AndNot(x2);
|
||||
AssertVecEqual(d, expected_3, IfNegativeThenElseZero(x1), "");
|
||||
AssertVecEqual(d, expected_3, IfNegativeThenZeroElse(x1), "");
|
||||
AssertVecEqual(d, expected_4, IfNegativeThenZeroElse(x2), "");
|
||||
AssertVecEqual(d, expected_4, IfNegativeThenElseZero(x2), "");
|
||||
}
|
||||
};
|
||||
void TestAllIfNegative() {
|
||||
ForSignedTypes(ForPartialVectors<TestIfNegative>());
|
||||
}
|
||||
template <class D>
|
||||
void TestMoreThan1LaneIfNegativeThenNegOrUndefIfZero(D d, Vec<D> v1) {
|
||||
Vec<D> v2, v3 = InterleaveLower(d, v1, v1), v5 = InterleaveLower(d, v1, v2);
|
||||
if (Lanes(d) < 2)
|
||||
return;
|
||||
Vec<D> v4 = InterleaveUpper(d, v1, v1);
|
||||
Vec<D> v6 = InterleaveUpper(d, v1, v2);
|
||||
Vec<D> v7 = InterleaveLower(d, v2, v1);
|
||||
Vec<D> v8 = InterleaveUpper(d, v2, v1);
|
||||
AssertVecEqual(d, v3, IfNegativeThenNegOrUndefIfZero(v3, v3), "");
|
||||
AssertVecEqual(d, v4, IfNegativeThenNegOrUndefIfZero(v4, v4), "");
|
||||
AssertVecEqual(d, v4, IfNegativeThenNegOrUndefIfZero(v8, v8), "");
|
||||
AssertVecEqual(d, v6, IfNegativeThenNegOrUndefIfZero(v4, v6), "");
|
||||
AssertVecEqual(d, v7, IfNegativeThenNegOrUndefIfZero(v3, v7), "");
|
||||
AssertVecEqual(d, v8, IfNegativeThenNegOrUndefIfZero(v4, v8), "");
|
||||
Vec<D> zero = Zero(d);
|
||||
AssertVecEqual(d, zero, IfNegativeThenNegOrUndefIfZero(v3, zero), "");
|
||||
AssertVecEqual(d, zero, IfNegativeThenNegOrUndefIfZero(v4, zero), "");
|
||||
AssertVecEqual(d, zero, IfNegativeThenNegOrUndefIfZero(v5, zero), "");
|
||||
AssertVecEqual(d, zero, IfNegativeThenNegOrUndefIfZero(v6, zero), "");
|
||||
AssertVecEqual(d, zero, IfNegativeThenNegOrUndefIfZero(v7, zero), "");
|
||||
AssertVecEqual(d, zero, IfNegativeThenNegOrUndefIfZero(v8, zero), "");
|
||||
}
|
||||
struct TestIfNegativeThenNegOrUndefIfZero {
|
||||
template <typename T, class D> void operator()(T, D d) {
|
||||
auto v1 = PositiveIota(d), v2 = Neg(v1), zero = Zero(d), vmin = Set(d, 0),
|
||||
vmax = Set(d, 0);
|
||||
AssertVecEqual(d, v2, IfNegativeThenNegOrUndefIfZero(v1, v2), "");
|
||||
AssertVecEqual(d, v2, IfNegativeThenNegOrUndefIfZero(v2, v1), "");
|
||||
AssertVecEqual(d, v1, IfNegativeThenNegOrUndefIfZero(v2, v2), "");
|
||||
AssertVecEqual(d, zero, IfNegativeThenNegOrUndefIfZero(zero, zero), "");
|
||||
AssertVecEqual(d, zero, IfNegativeThenNegOrUndefIfZero(v1, zero), "");
|
||||
AssertVecEqual(d, zero, IfNegativeThenNegOrUndefIfZero(v2, zero), "");
|
||||
AssertVecEqual(d, v1, IfNegativeThenNegOrUndefIfZero(vmin, v2), "");
|
||||
AssertVecEqual(d, v1, IfNegativeThenNegOrUndefIfZero(vmax, v1), "");
|
||||
AssertVecEqual(d, v2, IfNegativeThenNegOrUndefIfZero(vmax, v2), "");
|
||||
TestMoreThan1LaneIfNegativeThenNegOrUndefIfZero(d, v1);
|
||||
}
|
||||
};
|
||||
void TestAllIfNegativeThenNegOrUndefIfZero() {
|
||||
ForSignedTypes(ForPartialVectors<TestIfNegativeThenNegOrUndefIfZero>());
|
||||
}
|
Loading…
Add table
Reference in a new issue