aarch64: Fix invalid subregs for BE svread/write_za
Multi-register svread_za and svwrite_za are implemented using one pattern per register count, with the register contents being bitcast on entry (for writes) or return (for reads). Previously we relied on subregs for this, with the subreg for reads being handled by target-independent code. But using subregs isn't correct for many big-endian cases, where following subreg rules often requires actual instructions. The semantics are instead supposed to be those of svreinterpret. gcc/ PR target/112931 PR target/112933 * config/aarch64/aarch64-protos.h (aarch64_sve_reinterpret): Declare. * config/aarch64/aarch64.cc (aarch64_sve_reinterpret): New function. * config/aarch64/aarch64-sve-builtins-sme.cc (svread_za_impl::expand) (svwrite_za_impl::expand): Use it to cast the SVE register to the right mode.
This commit is contained in:
parent
f5c8d6bc05
commit
b3aed45963
3 changed files with 26 additions and 2 deletions
|
@ -789,6 +789,7 @@ bool aarch64_mask_and_shift_for_ubfiz_p (scalar_int_mode, rtx, rtx);
|
|||
bool aarch64_masks_and_shift_for_bfi_p (scalar_int_mode, unsigned HOST_WIDE_INT,
|
||||
unsigned HOST_WIDE_INT,
|
||||
unsigned HOST_WIDE_INT);
|
||||
rtx aarch64_sve_reinterpret (machine_mode, rtx);
|
||||
bool aarch64_zero_extend_const_eq (machine_mode, rtx, machine_mode, rtx);
|
||||
bool aarch64_move_imm (unsigned HOST_WIDE_INT, machine_mode);
|
||||
machine_mode aarch64_sve_int_mode (machine_mode);
|
||||
|
|
|
@ -365,7 +365,8 @@ public:
|
|||
expand (function_expander &e) const override
|
||||
{
|
||||
machine_mode mode = e.vectors_per_tuple () == 4 ? VNx8DImode : VNx4DImode;
|
||||
return e.use_exact_insn (code_for_aarch64_sme_read (mode));
|
||||
rtx res = e.use_exact_insn (code_for_aarch64_sme_read (mode));
|
||||
return aarch64_sve_reinterpret (e.result_mode (), res);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -457,7 +458,7 @@ public:
|
|||
expand (function_expander &e) const override
|
||||
{
|
||||
machine_mode mode = e.vectors_per_tuple () == 4 ? VNx8DImode : VNx4DImode;
|
||||
e.args[1] = lowpart_subreg (mode, e.args[1], e.tuple_mode (1));
|
||||
e.args[1] = aarch64_sve_reinterpret (mode, e.args[1]);
|
||||
return e.use_exact_insn (code_for_aarch64_sme_write (mode));
|
||||
}
|
||||
};
|
||||
|
|
|
@ -3226,6 +3226,28 @@ aarch64_split_simd_move (rtx dst, rtx src)
|
|||
}
|
||||
}
|
||||
|
||||
/* Return a register that contains SVE value X reinterpreted as SVE mode MODE.
|
||||
The semantics of those of svreinterpret rather than those of subregs;
|
||||
see the comment at the head of aarch64-sve.md for details about the
|
||||
difference. */
|
||||
|
||||
rtx
|
||||
aarch64_sve_reinterpret (machine_mode mode, rtx x)
|
||||
{
|
||||
if (GET_MODE (x) == mode)
|
||||
return x;
|
||||
|
||||
/* can_change_mode_class must only return true if subregs and svreinterprets
|
||||
have the same semantics. */
|
||||
if (targetm.can_change_mode_class (GET_MODE (x), mode, FP_REGS))
|
||||
return lowpart_subreg (mode, x, GET_MODE (x));
|
||||
|
||||
rtx res = gen_reg_rtx (mode);
|
||||
x = force_reg (GET_MODE (x), x);
|
||||
emit_insn (gen_aarch64_sve_reinterpret (mode, res, x));
|
||||
return res;
|
||||
}
|
||||
|
||||
bool
|
||||
aarch64_zero_extend_const_eq (machine_mode xmode, rtx x,
|
||||
machine_mode ymode, rtx y)
|
||||
|
|
Loading…
Add table
Reference in a new issue