So another constant synthesis improvement.

In this patch we're looking at cases where we'd like to be able to use
lui+slli, but can't because of the sign extending nature of lui on
TARGET_64BIT.  For example: 0x8001100000020UL.  The trunk currently generates 4
instructions for that constant, when it can be done with 3 (lui+slli.uw+addi).

When Zba is enabled, we can use lui+slli.uw as the slli.uw masks off the bits
32..63 before shifting, giving us the precise semantics we want.

I strongly suspect we'll want to do the same for a set of constants with
lui+add.uw, lui+shNadd.uw, so you'll see the beginnings of generalizing support
for lui followed by a "uw" instruction.

The new test just tests the set of cases that showed up while exploring a
particular space of the constant synthesis problem.  It's not meant to be
exhaustive (failure to use shadd when profitable).

gcc/

	* config/riscv/riscv.cc (riscv_integer_op): Add field tracking if we
	want to use a "uw" instruction variant.
	(riscv_build_integer_1): Initialize the new field in various places.
	Use lui+slli.uw for some constants.
	(riscv_move_integer): Handle slli.uw.

gcc/testsuite/

	* gcc.target/riscv/synthesis-2.c: New test.
This commit is contained in:
Jeff Law 2024-05-06 15:27:43 -06:00
parent 53026cbf08
commit 975bb17e2f
2 changed files with 1539 additions and 2 deletions

View file

@ -249,6 +249,7 @@ struct riscv_arg_info {
where A is an accumulator, each CODE[i] is a binary rtl operation
and each VALUE[i] is a constant integer. CODE[0] is undefined. */
struct riscv_integer_op {
bool use_uw;
enum rtx_code code;
unsigned HOST_WIDE_INT value;
};
@ -734,6 +735,7 @@ riscv_build_integer_1 (struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS],
/* Simply ADDI or LUI. */
codes[0].code = UNKNOWN;
codes[0].value = value;
codes[0].use_uw = false;
return 1;
}
if (TARGET_ZBS && SINGLE_BIT_MASK_OPERAND (value))
@ -741,6 +743,7 @@ riscv_build_integer_1 (struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS],
/* Simply BSETI. */
codes[0].code = UNKNOWN;
codes[0].value = value;
codes[0].use_uw = false;
/* RISC-V sign-extends all 32bit values that live in a 32bit
register. To avoid paradoxes, we thus need to use the
@ -769,6 +772,7 @@ riscv_build_integer_1 (struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS],
{
alt_codes[alt_cost-1].code = PLUS;
alt_codes[alt_cost-1].value = low_part;
alt_codes[alt_cost-1].use_uw = false;
memcpy (codes, alt_codes, sizeof (alt_codes));
cost = alt_cost;
}
@ -782,6 +786,7 @@ riscv_build_integer_1 (struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS],
{
alt_codes[alt_cost-1].code = XOR;
alt_codes[alt_cost-1].value = low_part;
alt_codes[alt_cost-1].use_uw = false;
memcpy (codes, alt_codes, sizeof (alt_codes));
cost = alt_cost;
}
@ -792,17 +797,37 @@ riscv_build_integer_1 (struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS],
{
int shift = ctz_hwi (value);
unsigned HOST_WIDE_INT x = value;
bool use_uw = false;
x = sext_hwi (x >> shift, HOST_BITS_PER_WIDE_INT - shift);
/* Don't eliminate the lower 12 bits if LUI might apply. */
if (shift > IMM_BITS && !SMALL_OPERAND (x) && LUI_OPERAND (x << IMM_BITS))
if (shift > IMM_BITS
&& !SMALL_OPERAND (x)
&& (LUI_OPERAND (x << IMM_BITS)
|| (TARGET_64BIT
&& TARGET_ZBA
&& LUI_OPERAND ((x << IMM_BITS)
& ~HOST_WIDE_INT_C (0x80000000)))))
shift -= IMM_BITS, x <<= IMM_BITS;
/* Adjust X if it isn't a LUI operand in isolation, but we can use
a subsequent "uw" instruction form to mask off the undesirable
bits. */
if (!LUI_OPERAND (x)
&& TARGET_64BIT
&& TARGET_ZBA
&& LUI_OPERAND (x & ~HOST_WIDE_INT_C (0x80000000UL)))
{
x = sext_hwi (x, 32);
use_uw = true;
}
alt_cost = 1 + riscv_build_integer_1 (alt_codes, x, mode);
if (alt_cost < cost)
{
alt_codes[alt_cost-1].code = ASHIFT;
alt_codes[alt_cost-1].value = shift;
alt_codes[alt_cost-1].use_uw = use_uw;
memcpy (codes, alt_codes, sizeof (alt_codes));
cost = alt_cost;
}
@ -823,8 +848,10 @@ riscv_build_integer_1 (struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS],
/* The sign-bit might be zero, so just rotate to be safe. */
codes[0].value = (((unsigned HOST_WIDE_INT) value >> trailing_ones)
| (value << (64 - trailing_ones)));
codes[0].use_uw = false;
codes[1].code = ROTATERT;
codes[1].value = 64 - trailing_ones;
codes[1].use_uw = false;
cost = 2;
}
/* Handle the case where the 11 bit range of zero bits wraps around. */
@ -836,8 +863,10 @@ riscv_build_integer_1 (struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS],
codes[0].value = ((value << (32 - upper_trailing_ones))
| ((unsigned HOST_WIDE_INT) value
>> (32 + upper_trailing_ones)));
codes[0].use_uw = false;
codes[1].code = ROTATERT;
codes[1].value = 32 - upper_trailing_ones;
codes[1].use_uw = false;
cost = 2;
}
/* Final cases, particularly focused on bseti. */
@ -851,6 +880,7 @@ riscv_build_integer_1 (struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS],
{
alt_codes[i].code = (i == 0 ? UNKNOWN : IOR);
alt_codes[i].value = value & 0x7ffff800;
alt_codes[i].use_uw = false;
value &= ~0x7ffff800;
i++;
}
@ -860,6 +890,7 @@ riscv_build_integer_1 (struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS],
{
alt_codes[i].code = (i == 0 ? UNKNOWN : PLUS);
alt_codes[i].value = value & 0x7ff;
alt_codes[i].use_uw = false;
value &= ~0x7ff;
i++;
}
@ -870,6 +901,7 @@ riscv_build_integer_1 (struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS],
HOST_WIDE_INT bit = ctz_hwi (value);
alt_codes[i].code = (i == 0 ? UNKNOWN : IOR);
alt_codes[i].value = 1UL << bit;
alt_codes[i].use_uw = false;
value &= ~(1ULL << bit);
i++;
}
@ -911,6 +943,7 @@ riscv_build_integer (struct riscv_integer_op *codes, HOST_WIDE_INT value,
{
alt_codes[alt_cost-1].code = LSHIFTRT;
alt_codes[alt_cost-1].value = shift;
alt_codes[alt_cost-1].use_uw = false;
memcpy (codes, alt_codes, sizeof (alt_codes));
cost = alt_cost;
}
@ -922,6 +955,7 @@ riscv_build_integer (struct riscv_integer_op *codes, HOST_WIDE_INT value,
{
alt_codes[alt_cost-1].code = LSHIFTRT;
alt_codes[alt_cost-1].value = shift;
alt_codes[alt_cost-1].use_uw = false;
memcpy (codes, alt_codes, sizeof (alt_codes));
cost = alt_cost;
}
@ -2478,7 +2512,29 @@ riscv_move_integer (rtx temp, rtx dest, HOST_WIDE_INT value,
else
x = force_reg (mode, x);
codes[i].value = trunc_int_for_mode (codes[i].value, mode);
x = gen_rtx_fmt_ee (codes[i].code, mode, x, GEN_INT (codes[i].value));
/* If the sequence requires using a "uw" form of an insn, we're
going to have to construct the RTL ourselves and put it in
a register to avoid force_reg/force_operand from mucking things
up. */
if (codes[i].use_uw)
{
gcc_assert (TARGET_64BIT || TARGET_ZBA);
rtx t = can_create_pseudo_p () ? gen_reg_rtx (mode) : temp;
/* Create the proper mask for the slli.uw instruction. */
unsigned HOST_WIDE_INT value = 0xffffffff;
value <<= codes[i].value;
/* Right now the only "uw" form we use is slli, we may add more
in the future. */
x = gen_rtx_fmt_ee (codes[i].code, mode,
x, GEN_INT (codes[i].value));
x = gen_rtx_fmt_ee (AND, mode, x, GEN_INT (value));
x = riscv_emit_set (t, x);
}
else
x = gen_rtx_fmt_ee (codes[i].code, mode,
x, GEN_INT (codes[i].value));
}
}

File diff suppressed because it is too large Load diff