RISC-V: Describe correct USEs for gpr_save pattern [PR95252]

- Verified on rv32emc/rv32gc/rv64gc bare-metal target and rv32gc/rv64gc
   linux target with qemu.

gcc/ChangeLog:

	* config/riscv/predicates.md (gpr_save_operation): New.
	* config/riscv/riscv-protos.h (riscv_gen_gpr_save_insn): New.
	(riscv_gpr_save_operation_p): Ditto.
	* config/riscv/riscv-sr.c (riscv_remove_unneeded_save_restore_calls):
	Ignore USEs for gpr_save patter.
	* config/riscv/riscv.c (gpr_save_reg_order): New.
	(riscv_expand_prologue): Use riscv_gen_gpr_save_insn to gen gpr_save.
	(riscv_gen_gpr_save_insn): New.
	(riscv_gpr_save_operation_p): Ditto.
	* config/riscv/riscv.md (S3_REGNUM): New.
	(S4_REGNUM): Ditto.
	(S5_REGNUM): Ditto.
	(S6_REGNUM): Ditto.
	(S7_REGNUM): Ditto.
	(S8_REGNUM): Ditto.
	(S9_REGNUM): Ditto.
	(S10_REGNUM): Ditto.
	(S11_REGNUM): Ditto.
	(gpr_save): Model USEs correctly.

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/pr95252.c: New.
This commit is contained in:
Kito Cheng 2020-06-10 19:40:59 -07:00
parent ec6ffbb919
commit d0e0c1300f
6 changed files with 156 additions and 5 deletions

View file

@ -206,3 +206,9 @@
(define_predicate "fp_branch_comparison"
(match_code "unordered,ordered,unlt,unge,unle,ungt,uneq,ltgt,ne,eq,lt,le,gt,ge"))
(define_special_predicate "gpr_save_operation"
(match_code "parallel")
{
return riscv_gpr_save_operation_p (op);
})

View file

@ -73,6 +73,8 @@ extern bool riscv_can_use_return_insn (void);
extern rtx riscv_function_value (const_tree, const_tree, enum machine_mode);
extern bool riscv_expand_block_move (rtx, rtx, rtx);
extern bool riscv_store_data_bypass_p (rtx_insn *, rtx_insn *);
extern rtx riscv_gen_gpr_save_insn (struct riscv_frame_info *);
extern bool riscv_gpr_save_operation_p (rtx);
/* Routines implemented in riscv-c.c. */
void riscv_cpu_cpp_builtins (cpp_reader *);

View file

@ -306,6 +306,10 @@ riscv_remove_unneeded_save_restore_calls (void)
if (CALL_P (insn))
++call_count;
/* Ignore any USEs in the gpr_save pattern. They don't prevent us
from optimizing away the save call. */
else if (insn == prologue_matched)
;
else
{
df_ref use;

View file

@ -334,6 +334,14 @@ static const struct attribute_spec riscv_attribute_table[] =
{ NULL, 0, 0, false, false, false, false, NULL, NULL }
};
/* Order for the CLOBBERs/USEs of gpr_save. */
static const unsigned gpr_save_reg_order[] = {
INVALID_REGNUM, T0_REGNUM, T1_REGNUM, RETURN_ADDR_REGNUM,
S0_REGNUM, S1_REGNUM, S2_REGNUM, S3_REGNUM, S4_REGNUM,
S5_REGNUM, S6_REGNUM, S7_REGNUM, S8_REGNUM, S9_REGNUM,
S10_REGNUM, S11_REGNUM
};
/* A table describing all the processors GCC knows about. */
static const struct riscv_cpu_info riscv_cpu_info_table[] = {
{ "rocket", generic, &rocket_tune_info },
@ -4069,9 +4077,9 @@ riscv_expand_prologue (void)
rtx dwarf = NULL_RTX;
dwarf = riscv_adjust_libcall_cfi_prologue ();
frame->mask = 0; /* Temporarily fib that we need not save GPRs. */
size -= frame->save_libcall_adjustment;
insn = emit_insn (gen_gpr_save (GEN_INT (mask)));
insn = emit_insn (riscv_gen_gpr_save_insn (frame));
frame->mask = 0; /* Temporarily fib that we need not save GPRs. */
RTX_FRAME_RELATED_P (insn) = 1;
REG_NOTES (insn) = dwarf;
@ -5177,6 +5185,77 @@ riscv_new_address_profitable_p (rtx memref, rtx_insn *insn, rtx new_addr)
return new_cost <= old_cost;
}
/* Helper function for generating gpr_save pattern. */
rtx
riscv_gen_gpr_save_insn (struct riscv_frame_info *frame)
{
unsigned count = riscv_save_libcall_count (frame->mask);
/* 1 for unspec 2 for clobber t0/t1 and 1 for ra. */
unsigned veclen = 1 + 2 + 1 + count;
rtvec vec = rtvec_alloc (veclen);
gcc_assert (veclen <= ARRAY_SIZE (gpr_save_reg_order));
RTVEC_ELT (vec, 0) =
gen_rtx_UNSPEC_VOLATILE (VOIDmode,
gen_rtvec (1, GEN_INT (frame->mask)), UNSPECV_GPR_SAVE);
for (int i = 1; i < veclen; ++i)
{
unsigned regno = gpr_save_reg_order[i];
rtx reg = gen_rtx_REG (Pmode, regno);
rtx elt;
/* t0 and t1 are CLOBBERs, others are USEs. */
if (i < 3)
elt = gen_rtx_CLOBBER (Pmode, reg);
else
elt = gen_rtx_USE (Pmode, reg);
RTVEC_ELT (vec, i) = elt;
}
/* Largest number of caller-save register must set in mask if we are
not using __riscv_save_0. */
gcc_assert ((count == 0) ||
BITSET_P (frame->mask, gpr_save_reg_order[veclen - 1]));
return gen_rtx_PARALLEL (VOIDmode, vec);
}
/* Return true if it's valid gpr_save pattern. */
bool
riscv_gpr_save_operation_p (rtx op)
{
HOST_WIDE_INT len = XVECLEN (op, 0);
gcc_assert (len <= ARRAY_SIZE (gpr_save_reg_order));
for (int i = 0; i < len; i++)
{
rtx elt = XVECEXP (op, 0, i);
if (i == 0)
{
/* First element in parallel is unspec. */
if (GET_CODE (elt) != UNSPEC_VOLATILE
|| GET_CODE (XVECEXP (elt, 0, 0)) != CONST_INT
|| XINT (elt, 1) != UNSPECV_GPR_SAVE)
return false;
}
else
{
/* Two CLOBBER and USEs, must check the order. */
unsigned expect_code = i < 3 ? CLOBBER : USE;
if (GET_CODE (elt) != expect_code
|| !REG_P (XEXP (elt, 1))
|| (REGNO (XEXP (elt, 1)) != gpr_save_reg_order[i]))
return false;
}
break;
}
return true;
}
/* Initialize the GCC target structure. */
#undef TARGET_ASM_ALIGNED_HI_OP
#define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"

View file

@ -75,6 +75,15 @@
(S0_REGNUM 8)
(S1_REGNUM 9)
(S2_REGNUM 18)
(S3_REGNUM 19)
(S4_REGNUM 20)
(S5_REGNUM 21)
(S6_REGNUM 22)
(S7_REGNUM 23)
(S8_REGNUM 24)
(S9_REGNUM 25)
(S10_REGNUM 26)
(S11_REGNUM 27)
(NORMAL_RETURN 0)
(SIBCALL_RETURN 1)
@ -2427,10 +2436,14 @@
""
"ebreak")
;; Must use the registers that we save to prevent the rename reg optimization
;; pass from using them before the gpr_save pattern when shrink wrapping
;; occurs. See bug 95252 for instance.
(define_insn "gpr_save"
[(unspec_volatile [(match_operand 0 "const_int_operand")] UNSPECV_GPR_SAVE)
(clobber (reg:SI T0_REGNUM))
(clobber (reg:SI T1_REGNUM))]
[(match_parallel 1 "gpr_save_operation"
[(unspec_volatile [(match_operand 0 "const_int_operand")]
UNSPECV_GPR_SAVE)])]
""
{ return riscv_output_gpr_save (INTVAL (operands[0])); })

View file

@ -0,0 +1,47 @@
/* PR target/95252 */
/* { dg-options "-O3 -funroll-loops -msave-restore" } */
/* { dg-do run } */
int a[6], b = 1, d, e;
long long c;
static int f = 1;
void
fn1 (int p1)
{
b = (b >> 1) & (1 ^ a[(1 ^ p1) & 5]);
}
void
fn2 ()
{
b = (b >> 1) & (1 ^ a[(b ^ 1) & 1]);
fn1 (c >> 1 & 5);
fn1 (c >> 2 & 5);
fn1 (c >> 4 & 5);
fn1 (c >> 8 & 5);
}
int
main ()
{
int i, j;
for (; d;)
{
for (; e;)
fn2 ();
f = 0;
}
for (i = 0; i < 8; i++)
{
if (f)
i = 9;
for (j = 0; j < 7; j++)
fn2 ();
}
if (b != 0)
__builtin_abort ();
return 0;
}