RISC-V: Optimize branches with shifted immediate operands

After the valuable feedback I received, it’s clear to me that the
oversight was in the tests showing the benefits of the patch. In the
test file, I added functions f5 and f6, which now generate more
efficient code with fewer instructions.

Before the patch:

f5:
        li      a4,2097152
        addi    a4,a4,-2048
        li      a5,1167360
        and     a0,a0,a4
        addi    a5,a5,-2048
        beq     a0,a5,.L4

f6:
        li      a5,3407872
        addi    a5,a5,-2048
        and     a0,a0,a5
        li      a5,1114112
        beq     a0,a5,.L7

After the patch:

f5:
        srli    a5,a0,11
        andi    a5,a5,1023
        li      a4,569
        beq     a5,a4,.L5

f6:
        srli    a5,a0,11
        andi    a5,a5,1663
        li      a4,544
        beq     a5,a4,.L9

	PR target/115921

gcc/ChangeLog:

	* config/riscv/iterators.md (any_eq): New code iterator.
	* config/riscv/riscv.h (COMMON_TRAILING_ZEROS): New macro.
	(SMALL_AFTER_COMMON_TRAILING_SHIFT): Ditto.
	* config/riscv/riscv.md (*branch<ANYI:mode>_shiftedarith_<optab>_shifted):
	New pattern.

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/branch-1.c: Additional tests.
This commit is contained in:
Jovan Vukic 2024-10-09 16:53:38 -06:00 committed by Jeff Law
parent df3bda457b
commit c8957c8779
4 changed files with 63 additions and 3 deletions

View file

@ -233,6 +233,8 @@
(define_code_iterator any_ge [ge geu])
(define_code_iterator any_lt [lt ltu])
(define_code_iterator any_le [le leu])
(define_code_iterator any_eq [eq ne])
;; Iterators for conditions we can emit a sCC against 0 or a reg directly
(define_code_iterator scc_0 [eq ne gt gtu])
@ -285,6 +287,8 @@
(le "le")
(gt "gt")
(lt "lt")
(eq "eq")
(ne "ne")
(ior "ior")
(xor "xor")
(and "and")

View file

@ -667,6 +667,18 @@ enum reg_class
/* True if bit BIT is set in VALUE. */
#define BITSET_P(VALUE, BIT) (((VALUE) & (1ULL << (BIT))) != 0)
/* Returns the smaller (common) number of trailing zeros for VAL1 and VAL2. */
#define COMMON_TRAILING_ZEROS(VAL1, VAL2) \
(ctz_hwi (VAL1) < ctz_hwi (VAL2) \
? ctz_hwi (VAL1) \
: ctz_hwi (VAL2))
/* Returns true if both VAL1 and VAL2 are SMALL_OPERANDs after shifting by
the common number of trailing zeros. */
#define SMALL_AFTER_COMMON_TRAILING_SHIFT(VAL1, VAL2) \
(SMALL_OPERAND ((VAL1) >> COMMON_TRAILING_ZEROS (VAL1, VAL2)) \
&& SMALL_OPERAND ((VAL2) >> COMMON_TRAILING_ZEROS (VAL1, VAL2)))
/* Stack layout; function entry, exit and calling. */
#define STACK_GROWS_DOWNWARD 1

View file

@ -3129,6 +3129,38 @@
}
[(set_attr "type" "branch")])
(define_insn_and_split "*branch<ANYI:mode>_shiftedarith_<optab>_shifted"
[(set (pc)
(if_then_else (any_eq
(and:ANYI (match_operand:ANYI 1 "register_operand" "r")
(match_operand 2 "shifted_const_arith_operand" "i"))
(match_operand 3 "shifted_const_arith_operand" "i"))
(label_ref (match_operand 0 "" ""))
(pc)))
(clobber (match_scratch:X 4 "=&r"))
(clobber (match_scratch:X 5 "=&r"))]
"!SMALL_OPERAND (INTVAL (operands[2]))
&& !SMALL_OPERAND (INTVAL (operands[3]))
&& SMALL_AFTER_COMMON_TRAILING_SHIFT (INTVAL (operands[2]),
INTVAL (operands[3]))"
"#"
"&& reload_completed"
[(set (match_dup 4) (lshiftrt:X (match_dup 1) (match_dup 7)))
(set (match_dup 4) (and:X (match_dup 4) (match_dup 8)))
(set (match_dup 5) (match_dup 9))
(set (pc) (if_then_else (any_eq (match_dup 4) (match_dup 5))
(label_ref (match_dup 0)) (pc)))]
{
HOST_WIDE_INT mask1 = INTVAL (operands[2]);
HOST_WIDE_INT mask2 = INTVAL (operands[3]);
int trailing_shift = COMMON_TRAILING_ZEROS (mask1, mask2);
operands[7] = GEN_INT (trailing_shift);
operands[8] = GEN_INT (mask1 >> trailing_shift);
operands[9] = GEN_INT (mask2 >> trailing_shift);
}
[(set_attr "type" "branch")])
(define_insn_and_split "*branch<ANYI:mode>_shiftedmask_equals_zero"
[(set (pc)
(if_then_else (match_operator 1 "equality_operator"

View file

@ -28,10 +28,22 @@ void f4(long long a)
g();
}
void f5(long long a)
{
if ((a & 0x1ff800) == 0x11c800)
g();
}
void f6(long long a)
{
if ((a & 0x33f800) == 0x110000)
g();
}
/* { dg-final { scan-assembler-times "slli\t" 2 } } */
/* { dg-final { scan-assembler-times "srli\t" 3 } } */
/* { dg-final { scan-assembler-times "andi\t" 1 } } */
/* { dg-final { scan-assembler-times "\tli\t" 1 } } */
/* { dg-final { scan-assembler-times "srli\t" 5 } } */
/* { dg-final { scan-assembler-times "andi\t" 3 } } */
/* { dg-final { scan-assembler-times "\tli\t" 3 } } */
/* { dg-final { scan-assembler-not "addi\t" } } */
/* { dg-final { scan-assembler-not "and\t" } } */