LoongArch: Optimize for conditional move operations

The optimization example is as follows.

From:
  if (condition)
    dest += 1 << 16;
To:
  dest += (condition ? 1 : 0) << 16;

It does not use maskeqz and masknez, thus reducing the number of
instructions.

gcc/ChangeLog:

	* config/loongarch/loongarch.cc
	(loongarch_expand_conditional_move): Add some optimization
	implementations based on noce_try_cmove_arith.

gcc/testsuite/ChangeLog:

	* gcc.target/loongarch/conditional-move-opt-1.c: New test.
	* gcc.target/loongarch/conditional-move-opt-2.c: New test.
This commit is contained in:
Guo Jie 2024-12-30 10:39:13 +08:00 committed by Lulu Cheng
parent ea7476516d
commit d55d40afd4
3 changed files with 202 additions and 1 deletions

View file

@ -5294,6 +5294,81 @@ loongarch_expand_conditional_move (rtx *operands)
loongarch_emit_float_compare (&code, &op0, &op1);
else
{
/* Optimize to reduce the number of instructions for ternary operations.
Mainly implemented based on noce_try_cmove_arith.
For dest = (condition) ? value_if_true : value_if_false;
the optimization requires:
a. value_if_false = var;
b. value_if_true = var OP C (a positive integer power of 2).
Situations similar to the following:
if (condition)
dest += 1 << imm;
to:
dest += (condition ? 1 : 0) << imm; */
rtx_insn *insn;
HOST_WIDE_INT val = 0; /* The value of rtx C. */
/* INSN with operands[2] as the output. */
rtx_insn *value_if_true_insn = NULL;
/* INSN with operands[3] as the output. */
rtx_insn *value_if_false_insn = NULL;
rtx value_if_true_insn_src = NULL_RTX;
/* Common operand var in value_if_true and value_if_false. */
rtx comm_var = NULL_RTX;
bool can_be_optimized = false;
/* Search value_if_true_insn and value_if_false_insn. */
struct sequence_stack *seq = get_current_sequence ()->next;
for (insn = seq->last; insn; insn = PREV_INSN (insn))
{
if (single_set (insn))
{
rtx set_dest = SET_DEST (single_set (insn));
if (rtx_equal_p (set_dest, operands[2]))
value_if_true_insn = insn;
else if (rtx_equal_p (set_dest, operands[3]))
value_if_false_insn = insn;
if (value_if_true_insn && value_if_false_insn)
break;
}
}
/* Check if the optimization conditions are met. */
if (value_if_true_insn
&& value_if_false_insn
/* Make sure that value_if_false and var are the same. */
&& BINARY_P (value_if_true_insn_src
= SET_SRC (single_set (value_if_true_insn)))
/* Make sure that both value_if_true and value_if_false
has the same var. */
&& rtx_equal_p (XEXP (value_if_true_insn_src, 0),
SET_SRC (single_set (value_if_false_insn))))
{
comm_var = SET_SRC (single_set (value_if_false_insn));
rtx src = XEXP (value_if_true_insn_src, 1);
rtx imm = NULL_RTX;
if (CONST_INT_P (src))
imm = src;
else
for (insn = seq->last; insn; insn = PREV_INSN (insn))
{
rtx set = single_set (insn);
if (set && rtx_equal_p (SET_DEST (set), src))
{
imm = SET_SRC (set);
break;
}
}
if (imm && CONST_INT_P (imm))
{
val = INTVAL (imm);
/* Make sure that imm is a positive integer power of 2. */
if (val > 0 && !(val & (val - 1)))
can_be_optimized = true;
}
}
if (GET_MODE_SIZE (GET_MODE (op0)) < UNITS_PER_WORD)
{
promote_op[0] = (REG_P (op0) && REG_P (operands[2]) &&
@ -5314,22 +5389,48 @@ loongarch_expand_conditional_move (rtx *operands)
op0_extend = op0;
op1_extend = force_reg (word_mode, op1);
rtx target = gen_reg_rtx (GET_MODE (op0));
if (code == EQ || code == NE)
{
op0 = loongarch_zero_if_equal (op0, op1);
op1 = const0_rtx;
/* For EQ, set target to 1 if op0 and op1 are the same,
otherwise set to 0.
For NE, set target to 0 if op0 and op1 are the same,
otherwise set to 1. */
if (can_be_optimized)
loongarch_emit_binary (code, target, op0, const0_rtx);
}
else
{
/* The comparison needs a separate scc instruction. Store the
result of the scc in *OP0 and compare it against zero. */
bool invert = false;
rtx target = gen_reg_rtx (GET_MODE (op0));
loongarch_emit_int_order_test (code, &invert, target, op0, op1);
if (can_be_optimized && invert)
loongarch_emit_binary (EQ, target, target, const0_rtx);
code = invert ? EQ : NE;
op0 = target;
op1 = const0_rtx;
}
if (can_be_optimized)
{
/* Perform (condition ? 1 : 0) << log2 (C). */
loongarch_emit_binary (ASHIFT, target, target,
GEN_INT (exact_log2 (val)));
/* Shift-related insn patterns only support SImode operands[2]. */
enum rtx_code opcode = GET_CODE (value_if_true_insn_src);
if (opcode == ASHIFT || opcode == ASHIFTRT || opcode == LSHIFTRT
|| opcode == ROTATE || opcode == ROTATERT)
target = gen_lowpart (SImode, target);
/* Perform target = target OP ((condition ? 1 : 0) << log2 (C)). */
loongarch_emit_binary (opcode, operands[0],
force_reg (GET_MODE (operands[3]), comm_var),
target);
return;
}
}
rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);

View file

@ -0,0 +1,58 @@
/* { dg-do compile } */
/* { dg-options "-O2" } */
/* { dg-final { scan-assembler-not "maskeqz" } } */
/* { dg-final { scan-assembler-not "masknez" } } */
extern long lm, ln, lr;
void
test_ne ()
{
if (lm != ln)
lr += (1 << 16);
lr += lm;
}
void
test_eq ()
{
if (lm == ln)
lr = lm + (1 << 16);
else
lr = lm;
lr += lm;
}
void
test_lt ()
{
if (lm < ln)
lr *= (1 << 16);
lr += lm;
}
void
test_le ()
{
if (lm <= ln)
lr = lm * ((long)1 << 32);
else
lr = lm;
lr += lm;
}
void
test_nez ()
{
if (lm != 0)
lr <<= (1 << 4);
lr += lm;
}
void
test_eqz ()
{
if (lm == 0)
lr >>= (1 << 2);
lr += lm;
}

View file

@ -0,0 +1,42 @@
/* { dg-do compile } */
/* { dg-options "-O2 --param max-rtl-if-conversion-insns=1" } */
/* { dg-final { scan-assembler-not "maskeqz" } } */
/* { dg-final { scan-assembler-not "masknez" } } */
/* The relevant optimization is currently only based on noce_try_cmove_arith,
so it bypasses noce_convert_multiple_sets by
--param max-rtl-if-conversion-insns=1 to execute noce_try_cmove_arith. */
extern long lm, ln, lr;
void
test_ge ()
{
if (lm >= ln)
lr += ((long)1 << 32);
lr += lm;
}
void
test_ltz ()
{
if (lm < 0)
lr |= (1 << 16);
lr += lm;
}
void
test_lez ()
{
if (lm <= 0)
lr &= (1 << 16);
lr += lm;
}
void
test_gez ()
{
if (lm >= 0)
lr ^= (1 << 16);
lr += lm;
}