[to-be-committed][v3][RISC-V] Handle bit manipulation of SImode values

Last patch in this round of bitmanip work...  At least I think I'm going to
pause here and switch gears to other projects that need attention 🙂

This patch introduces the ability to generate bitmanip instructions for rv64
when operating on SI objects when we know something about the range of the bit
position (due to masking of the position).

I've got note that the (7-pos % 8) bit position form was discovered by RAU in
500.perl.  I took that and expanded it to the simple (pos & mask) form as well
as covering bset, binv and bclr.

As far as the implementation is concerned....

This turns the recently added define_splits into define_insn_and_split
constructs.  This allows combine to "see" enough RTL to realize a sign
extension is unnecessary.  Otherwise we get undesirable sign extensions for the
new testcases.

Second it adds new patterns for the logical operations.  Two patterns for
IOR/XOR and two patterns for AND.

I think a key concept to keep in mind is that once we determine a Zbs operation
is safe to perform on a SI value, we can rewrite the RTL in 64bit form.  If we
were ever to try and use range information at expand time for this stuff (and
we probably should investigate that), that's the path I'd suggest.

This is notably cleaner than my original implementation which actually kept the
more complex RTL form through final and emitted 2/3 instructions (mask the bit
position, then the bset/bclr/binv).

Tested in my tester, but waiting for pre-commit CI to report back before taking
further action.

gcc/
	* config/riscv/bitmanip.md (bset splitters): Turn into define_and_splits.
	Don't depend on combine splitting the "andn with constant" form.
	(bset, binv, bclr with masked bit position): New patterns.

gcc/testsuite
	* gcc.target/riscv/binv-for-simode-1.c: New test.
	* gcc.target/riscv/bset-for-simode-1.c: New test.
	* gcc.target/riscv/bclr-for-simode-1.c: New test.
This commit is contained in:
Jeff Law 2024-07-06 12:57:59 -06:00
parent bb16e3179e
commit 273f16a125
4 changed files with 192 additions and 16 deletions

View file

@ -615,37 +615,140 @@
;; shift constant. With the limited range we know the SImode sign ;; shift constant. With the limited range we know the SImode sign
;; bit is never set, thus we can treat this as zero extending and ;; bit is never set, thus we can treat this as zero extending and
;; generate the bsetdi_2 pattern. ;; generate the bsetdi_2 pattern.
(define_split (define_insn_and_split ""
[(set (match_operand:DI 0 "register_operand") [(set (match_operand:DI 0 "register_operand" "=r")
(any_extend:DI (any_extend:DI
(ashift:SI (const_int 1) (ashift:SI (const_int 1)
(subreg:QI (subreg:QI
(and:DI (not:DI (match_operand:DI 1 "register_operand")) (and:DI (not:DI (match_operand:DI 1 "register_operand" "r"))
(match_operand 2 "const_int_operand")) 0)))) (match_operand 2 "const_int_operand")) 0))))
(clobber (match_operand:DI 3 "register_operand"))] (clobber (match_scratch:X 3 "=&r"))]
"TARGET_64BIT "TARGET_64BIT
&& TARGET_ZBS && TARGET_ZBS
&& (TARGET_ZBB || TARGET_ZBKB) && (TARGET_ZBB || TARGET_ZBKB)
&& (INTVAL (operands[2]) & 0x1f) != 0x1f" && (INTVAL (operands[2]) & 0x1f) != 0x1f"
[(set (match_dup 0) (and:DI (not:DI (match_dup 1)) (match_dup 2))) "#"
(set (match_dup 0) (zero_extend:DI (ashift:SI "&& reload_completed"
(const_int 1) [(set (match_dup 3) (match_dup 2))
(subreg:QI (match_dup 0) 0))))]) (set (match_dup 3) (and:DI (not:DI (match_dup 1)) (match_dup 3)))
(set (match_dup 0) (zero_extend:DI
(ashift:SI (const_int 1) (match_dup 4))))]
{ operands[4] = gen_lowpart (QImode, operands[3]); }
[(set_attr "type" "bitmanip")])
(define_split (define_insn_and_split ""
[(set (match_operand:DI 0 "register_operand") [(set (match_operand:DI 0 "register_operand" "=r")
(any_extend:DI (any_extend:DI
(ashift:SI (const_int 1) (ashift:SI (const_int 1)
(subreg:QI (subreg:QI
(and:DI (match_operand:DI 1 "register_operand") (and:DI (match_operand:DI 1 "register_operand" "r")
(match_operand 2 "const_int_operand")) 0))))] (match_operand 2 "const_int_operand")) 0))))]
"TARGET_64BIT "TARGET_64BIT
&& TARGET_ZBS && TARGET_ZBS
&& (INTVAL (operands[2]) & 0x1f) != 0x1f" && (INTVAL (operands[2]) & 0x1f) != 0x1f"
[(set (match_dup 0) (and:DI (match_dup 1) (match_dup 2))) "#"
(set (match_dup 0) (zero_extend:DI (ashift:SI "&& 1"
(const_int 1) [(set (match_dup 0) (and:DI (match_dup 1) (match_dup 2)))
(subreg:QI (match_dup 0) 0))))]) (set (match_dup 0) (zero_extend:DI (ashift:SI
(const_int 1)
(subreg:QI (match_dup 0) 0))))]
{ }
[(set_attr "type" "bitmanip")])
;; Similarly two patterns for IOR/XOR generating bset/binv to
;; manipulate a bit in a register
(define_insn_and_split ""
[(set (match_operand:DI 0 "register_operand" "=r")
(any_or:DI
(any_extend:DI
(ashift:SI
(const_int 1)
(subreg:QI
(and:DI (not:DI (match_operand:DI 1 "register_operand" "r"))
(match_operand 2 "const_int_operand")) 0)))
(match_operand:DI 3 "register_operand" "r")))
(clobber (match_scratch:X 4 "=&r"))]
"TARGET_64BIT
&& TARGET_ZBS
&& (TARGET_ZBB || TARGET_ZBKB)
&& (INTVAL (operands[2]) & 0x1f) != 0x1f"
"#"
"&& reload_completed"
[(set (match_dup 4) (match_dup 2))
(set (match_dup 4) (and:DI (not:DI (match_dup 4)) (match_dup 1)))
(set (match_dup 0) (any_or:DI (ashift:DI (const_int 1) (match_dup 5)) (match_dup 3)))]
{ operands[5] = gen_lowpart (QImode, operands[4]); }
[(set_attr "type" "bitmanip")])
(define_insn_and_split ""
[(set (match_operand:DI 0 "register_operand" "=r")
(any_or:DI
(any_extend:DI
(ashift:SI
(const_int 1)
(subreg:QI
(and:DI (match_operand:DI 1 "register_operand" "r")
(match_operand 2 "const_int_operand")) 0)))
(match_operand:DI 3 "register_operand" "r")))
(clobber (match_scratch:X 4 "=&r"))]
"TARGET_64BIT
&& TARGET_ZBS
&& (INTVAL (operands[2]) & 0x1f) != 0x1f"
"#"
"&& reload_completed"
[(set (match_dup 4) (and:DI (match_dup 1) (match_dup 2)))
(set (match_dup 0) (any_or:DI (ashift:DI (const_int 1) (subreg:QI (match_dup 4) 0)) (match_dup 3)))]
{ }
[(set_attr "type" "bitmanip")])
;; Similarly two patterns for AND generating bclr to
;; manipulate a bit in a register
(define_insn_and_split ""
[(set (match_operand:DI 0 "register_operand" "=r")
(and:DI
(not:DI
(any_extend:DI
(ashift:SI
(const_int 1)
(subreg:QI
(and:DI (not:DI (match_operand:DI 1 "register_operand" "r"))
(match_operand 2 "const_int_operand")) 0))))
(match_operand:DI 3 "register_operand" "r")))
(clobber (match_scratch:X 4 "=&r"))]
"TARGET_64BIT
&& TARGET_ZBS
&& (TARGET_ZBB || TARGET_ZBKB)
&& (INTVAL (operands[2]) & 0x1f) != 0x1f"
"#"
"&& reload_completed"
[(set (match_dup 4) (match_dup 2))
(set (match_dup 4) (and:DI (not:DI (match_dup 1)) (match_dup 4)))
(set (match_dup 0) (and:DI (rotate:DI (const_int -2) (match_dup 5)) (match_dup 3)))]
{ operands[5] = gen_lowpart (QImode, operands[4]); }
[(set_attr "type" "bitmanip")])
(define_insn_and_split ""
[(set (match_operand:DI 0 "register_operand" "=r")
(and:DI
(not:DI
(any_extend:DI
(ashift:SI
(const_int 1)
(subreg:QI
(and:DI (match_operand:DI 1 "register_operand" "r")
(match_operand 2 "const_int_operand")) 0))))
(match_operand:DI 3 "register_operand" "r")))
(clobber (match_scratch:X 4 "=&r"))]
"TARGET_64BIT
&& TARGET_ZBS
&& (INTVAL (operands[2]) & 0x1f) != 0x1f"
"#"
"&& reload_completed"
[(set (match_dup 4) (and:DI (match_dup 1) (match_dup 2)))
(set (match_dup 0) (and:DI (rotate:DI (const_int -2) (match_dup 5)) (match_dup 3)))]
{ operands[5] = gen_lowpart (QImode, operands[4]); }
[(set_attr "type" "bitmanip")])
(define_insn "*bset<mode>_1_mask" (define_insn "*bset<mode>_1_mask"
[(set (match_operand:X 0 "register_operand" "=r") [(set (match_operand:X 0 "register_operand" "=r")

View file

@ -0,0 +1,25 @@
/* { dg-do compile } */
/* { dg-options "-march=rv64gc_zbb_zbs -mabi=lp64" } */
/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */
typedef unsigned int uint32_t;
uint32_t foo(uint32_t pos, uint32_t x)
{
return x & ~(1 <<( pos & 0xf));
}
typedef unsigned int uint32_t;
uint32_t foo2(uint32_t pos, uint32_t x)
{
return x & ~(1 <<(7-(pos) % 8));
}
/* { dg-final { scan-assembler-not "sll\t" } } */
/* { dg-final { scan-assembler-times "bclr\t" 2 } } */
/* { dg-final { scan-assembler-times "andi\t" 1 } } */
/* { dg-final { scan-assembler-times "andn\t" 1 } } */
/* { dg-final { scan-assembler-times "ret" 2 } } */

View file

@ -0,0 +1,24 @@
/* { dg-do compile } */
/* { dg-options "-march=rv64gc_zbb_zbs -mabi=lp64" } */
/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */
typedef unsigned int uint32_t;
uint32_t foo(uint32_t pos, uint32_t x)
{
return x ^ (1 <<( pos & 0xf));
}
typedef unsigned int uint32_t;
uint32_t foo2(uint32_t pos, uint32_t x)
{
return x ^ (1 <<(7-(pos) % 8));
}
/* { dg-final { scan-assembler-not "sll\t" } } */
/* { dg-final { scan-assembler-times "binv\t" 2 } } */
/* { dg-final { scan-assembler-times "andi\t" 1 } } */
/* { dg-final { scan-assembler-times "andn\t" 1 } } */
/* { dg-final { scan-assembler-times "ret" 2 } } */

View file

@ -0,0 +1,24 @@
/* { dg-do compile } */
/* { dg-options "-march=rv64gc_zbb_zbs -mabi=lp64" } */
/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */
typedef unsigned int uint32_t;
uint32_t foo(uint32_t pos, uint32_t x)
{
return x | (1 <<( pos & 0xf));
}
typedef unsigned int uint32_t;
uint32_t foo2(uint32_t pos, uint32_t x)
{
return x | (1 <<(7-(pos) % 8));
}
/* { dg-final { scan-assembler-not "sll\t" } } */
/* { dg-final { scan-assembler-times "bset\t" 2 } } */
/* { dg-final { scan-assembler-times "andi\t" 1 } } */
/* { dg-final { scan-assembler-times "andn\t" 1 } } */
/* { dg-final { scan-assembler-times "ret" 2 } } */