ARC: Improved DImode rotates and right shifts by one bit.
This patch improves the code generated for DImode right shifts (both arithmetic and logical) by a single bit, and also for DImode rotates (both left and right) by a single bit. In approach, this is similar to the recently added DImode left shift by a single bit patch, but also builds upon the x86's UNSPEC carry flag representation: https://gcc.gnu.org/pipermail/gcc-patches/2023-October/632169.html The benefits can be seen from the four new test cases: long long ashr(long long x) { return x >> 1; } Before: ashr: asl r2,r1,31 lsr_s r0,r0 or_s r0,r0,r2 j_s.d [blink] asr_s r1,r1,1 After: ashr: asr.f r1,r1 j_s.d [blink] rrc r0,r0 unsigned long long lshr(unsigned long long x) { return x >> 1; } Before: lshr: asl r2,r1,31 lsr_s r0,r0 or_s r0,r0,r2 j_s.d [blink] lsr_s r1,r1 After: lshr: lsr.f r1,r1 j_s.d [blink] rrc r0,r0 unsigned long long rotl(unsigned long long x) { return (x<<1) | (x>>63); } Before: rotl: lsr r12,r1,31 lsr r2,r0,31 asl_s r3,r0,1 asl_s r1,r1,1 or r0,r12,r3 j_s.d [blink] or_s r1,r1,r2 After: rotl: add.f r0,r0,r0 adc.f r1,r1,r1 j_s.d [blink] add.cs r0,r0,1 unsigned long long rotr(unsigned long long x) { return (x>>1) | (x<<63); } Before: rotr: asl r12,r1,31 asl r2,r0,31 lsr_s r3,r0 lsr_s r1,r1 or r0,r12,r3 j_s.d [blink] or_s r1,r1,r2 After: rotr: asr.f 0,r0 rrc.f r1,r1 j_s.d [blink] rrc r0,r0 On CPUs without a barrel shifter the improvements are even better. 2023-11-13 Roger Sayle <roger@nextmovesoftware.com> gcc/ChangeLog * config/arc/arc.md (UNSPEC_ARC_CC_NEZ): New UNSPEC that represents the carry flag being set if the operand is non-zero. (adc_f): New define_insn representing adc with updated flags. (ashrdi3): New define_expand that only handles shifts by 1. (ashrdi3_cnt1): New pre-reload define_insn_and_split. (lshrdi3): New define_expand that only handles shifts by 1. (lshrdi3_cnt1): New pre-reload define_insn_and_split. (rrcsi2): New define_insn for rrc (SImode rotate right through carry). (rrcsi2_carry): Likewise for rrc.f, as above but updating flags. (rotldi3): New define_expand that only handles rotates by 1. (rotldi3_cnt1): New pre-reload define_insn_and_split. (rotrdi3): New define_expand that only handles rotates by 1. (rotrdi3_cnt1): New pre-reload define_insn_and_split. (lshrsi3_cnt1_carry): New define_insn for lsr.f. (ashrsi3_cnt1_carry): New define_insn for asr.f. (btst_0_carry): New define_insn for asr.f without result. gcc/testsuite/ChangeLog * gcc.target/arc/ashrdi3-1.c: New test case. * gcc.target/arc/lshrdi3-1.c: Likewise. * gcc.target/arc/rotldi3-1.c: Likewise. * gcc.target/arc/rotrdi3-1.c: Likewise.
This commit is contained in:
parent
e9d59a2a5a
commit
b51bfee1be
5 changed files with 261 additions and 0 deletions
|
@ -136,6 +136,7 @@
|
|||
UNSPEC_ARC_VMAC2HU
|
||||
UNSPEC_ARC_VMPY2H
|
||||
UNSPEC_ARC_VMPY2HU
|
||||
UNSPEC_ARC_CC_NEZ
|
||||
|
||||
VUNSPEC_ARC_RTIE
|
||||
VUNSPEC_ARC_SYNC
|
||||
|
@ -2789,6 +2790,31 @@ archs4x, archs4xd"
|
|||
(set_attr "type" "cc_arith")
|
||||
(set_attr "length" "4,4,4,4,8,8")])
|
||||
|
||||
(define_insn "adc_f"
|
||||
[(set (reg:CC_C CC_REG)
|
||||
(compare:CC_C
|
||||
(zero_extend:DI
|
||||
(plus:SI
|
||||
(plus:SI
|
||||
(ltu:SI (reg:CC_C CC_REG) (const_int 0))
|
||||
(match_operand:SI 1 "register_operand" "%r"))
|
||||
(match_operand:SI 2 "register_operand" "r")))
|
||||
(plus:DI
|
||||
(ltu:DI (reg:CC_C CC_REG) (const_int 0))
|
||||
(zero_extend:DI (match_dup 1)))))
|
||||
(set (match_operand:SI 0 "register_operand" "=r")
|
||||
(plus:SI
|
||||
(plus:SI
|
||||
(ltu:SI (reg:CC_C CC_REG) (const_int 0))
|
||||
(match_dup 1))
|
||||
(match_dup 2)))]
|
||||
""
|
||||
"adc.f\\t%0,%1,%2"
|
||||
[(set_attr "cond" "set")
|
||||
(set_attr "predicable" "no")
|
||||
(set_attr "type" "cc_arith")
|
||||
(set_attr "length" "4")])
|
||||
|
||||
; combiner-splitter cmp / scc -> cmp / adc
|
||||
(define_split
|
||||
[(set (match_operand:SI 0 "dest_reg_operand" "")
|
||||
|
@ -3529,6 +3555,68 @@ archs4x, archs4xd"
|
|||
""
|
||||
[(set_attr "length" "8")])
|
||||
|
||||
(define_expand "ashrdi3"
|
||||
[(parallel
|
||||
[(set (match_operand:DI 0 "register_operand")
|
||||
(ashiftrt:DI (match_operand:DI 1 "register_operand")
|
||||
(match_operand:QI 2 "const_int_operand")))
|
||||
(clobber (reg:CC CC_REG))])]
|
||||
""
|
||||
{
|
||||
if (operands[2] != const1_rtx)
|
||||
FAIL;
|
||||
})
|
||||
|
||||
;; Split into asr.f hi; rrc lo
|
||||
(define_insn_and_split "*ashrdi3_cnt1"
|
||||
[(set (match_operand:DI 0 "register_operand")
|
||||
(ashiftrt:DI (match_operand:DI 1 "register_operand")
|
||||
(const_int 1)))
|
||||
(clobber (reg:CC CC_REG))]
|
||||
"arc_pre_reload_split ()"
|
||||
"#"
|
||||
"&& 1"
|
||||
[(const_int 0)]
|
||||
{
|
||||
emit_insn (gen_ashrsi3_cnt1_carry (gen_highpart (SImode, operands[0]),
|
||||
gen_highpart (SImode, operands[1])));
|
||||
emit_insn (gen_rrcsi2 (gen_lowpart (SImode, operands[0]),
|
||||
gen_lowpart (SImode, operands[1])));
|
||||
DONE;
|
||||
}
|
||||
[(set_attr "length" "8")])
|
||||
|
||||
(define_expand "lshrdi3"
|
||||
[(parallel
|
||||
[(set (match_operand:DI 0 "register_operand")
|
||||
(lshiftrt:DI (match_operand:DI 1 "register_operand")
|
||||
(match_operand:QI 2 "const_int_operand")))
|
||||
(clobber (reg:CC CC_REG))])]
|
||||
""
|
||||
{
|
||||
if (operands[2] != const1_rtx)
|
||||
FAIL;
|
||||
})
|
||||
|
||||
;; Split into lsr.f hi; rrc lo
|
||||
(define_insn_and_split "*lshrdi3_cnt1"
|
||||
[(set (match_operand:DI 0 "register_operand")
|
||||
(lshiftrt:DI (match_operand:DI 1 "register_operand")
|
||||
(const_int 1)))
|
||||
(clobber (reg:CC CC_REG))]
|
||||
"arc_pre_reload_split ()"
|
||||
"#"
|
||||
"&& 1"
|
||||
[(const_int 0)]
|
||||
{
|
||||
emit_insn (gen_lshrsi3_cnt1_carry (gen_highpart (SImode, operands[0]),
|
||||
gen_highpart (SImode, operands[1])));
|
||||
emit_insn (gen_rrcsi2 (gen_lowpart (SImode, operands[0]),
|
||||
gen_lowpart (SImode, operands[1])));
|
||||
DONE;
|
||||
}
|
||||
[(set_attr "length" "8")])
|
||||
|
||||
;; Rotate instructions.
|
||||
|
||||
(define_insn "rotrsi3_insn"
|
||||
|
@ -3570,6 +3658,103 @@ archs4x, archs4xd"
|
|||
}
|
||||
})
|
||||
|
||||
;; Rotate through carry flag
|
||||
|
||||
(define_insn "rrcsi2"
|
||||
[(set (match_operand:SI 0 "dest_reg_operand" "=r")
|
||||
(plus:SI
|
||||
(lshiftrt:SI (match_operand:SI 1 "register_operand" "r")
|
||||
(const_int 1))
|
||||
(ashift:SI (ltu:SI (reg:CC_C CC_REG) (const_int 0))
|
||||
(const_int 31))))]
|
||||
""
|
||||
"rrc\\t%0,%1"
|
||||
[(set_attr "type" "shift")
|
||||
(set_attr "predicable" "no")
|
||||
(set_attr "length" "4")])
|
||||
|
||||
(define_insn "rrcsi2_carry"
|
||||
[(set (reg:CC_C CC_REG)
|
||||
(unspec:CC_C [(and:SI (match_operand:SI 1 "register_operand" "r")
|
||||
(const_int 1))] UNSPEC_ARC_CC_NEZ))
|
||||
(set (match_operand:SI 0 "dest_reg_operand" "=r")
|
||||
(plus:SI
|
||||
(lshiftrt:SI (match_dup 1) (const_int 1))
|
||||
(ashift:SI (ltu:SI (reg:CC_C CC_REG) (const_int 0))
|
||||
(const_int 31))))]
|
||||
""
|
||||
"rrc.f\\t%0,%1"
|
||||
[(set_attr "type" "shift")
|
||||
(set_attr "predicable" "no")
|
||||
(set_attr "length" "4")])
|
||||
|
||||
;; DImode Rotate instructions
|
||||
|
||||
(define_expand "rotldi3"
|
||||
[(parallel
|
||||
[(set (match_operand:DI 0 "register_operand")
|
||||
(rotate:DI (match_operand:DI 1 "register_operand")
|
||||
(match_operand:QI 2 "const_int_operand")))
|
||||
(clobber (reg:CC CC_REG))])]
|
||||
""
|
||||
{
|
||||
if (operands[2] != const1_rtx)
|
||||
FAIL;
|
||||
})
|
||||
|
||||
;; split into add.f lo; adc.f hi; adc lo
|
||||
(define_insn_and_split "*rotldi3_cnt1"
|
||||
[(set (match_operand:DI 0 "register_operand")
|
||||
(rotate:DI (match_operand:DI 1 "register_operand")
|
||||
(const_int 1)))
|
||||
(clobber (reg:CC CC_REG))]
|
||||
"arc_pre_reload_split ()"
|
||||
"#"
|
||||
"&& 1"
|
||||
[(const_int 0)]
|
||||
{
|
||||
rtx lo0 = gen_lowpart (SImode, operands[0]);
|
||||
rtx lo1 = gen_lowpart (SImode, operands[1]);
|
||||
rtx hi1 = gen_highpart (SImode, operands[1]);
|
||||
emit_insn (gen_add_f (lo0, lo1, lo1));
|
||||
emit_insn (gen_adc_f (gen_highpart (SImode, operands[0]), hi1, hi1));
|
||||
emit_insn (gen_adc (lo0, lo0, const0_rtx));
|
||||
DONE;
|
||||
}
|
||||
[(set_attr "length" "12")])
|
||||
|
||||
(define_expand "rotrdi3"
|
||||
[(parallel
|
||||
[(set (match_operand:DI 0 "register_operand")
|
||||
(rotatert:DI (match_operand:DI 1 "register_operand")
|
||||
(match_operand:QI 2 "const_int_operand")))
|
||||
(clobber (reg:CC CC_REG))])]
|
||||
""
|
||||
{
|
||||
if (operands[2] != const1_rtx)
|
||||
FAIL;
|
||||
})
|
||||
|
||||
;; split into asr.f lo; rrc.f hi; rrc lo
|
||||
(define_insn_and_split "*rotrdi3_cnt1"
|
||||
[(set (match_operand:DI 0 "register_operand")
|
||||
(rotatert:DI (match_operand:DI 1 "register_operand")
|
||||
(const_int 1)))
|
||||
(clobber (reg:CC CC_REG))]
|
||||
"arc_pre_reload_split ()"
|
||||
"#"
|
||||
"&& 1"
|
||||
[(const_int 0)]
|
||||
{
|
||||
rtx lo = gen_lowpart (SImode, operands[1]);
|
||||
emit_insn (gen_btst_0_carry (lo));
|
||||
emit_insn (gen_rrcsi2_carry (gen_highpart (SImode, operands[0]),
|
||||
gen_highpart (SImode, operands[1])));
|
||||
emit_insn (gen_rrcsi2 (gen_lowpart (SImode, operands[0]), lo));
|
||||
DONE;
|
||||
}
|
||||
[(set_attr "length" "12")])
|
||||
|
||||
;; Compare / branch instructions.
|
||||
|
||||
(define_expand "cbranchsi4"
|
||||
|
@ -6009,6 +6194,18 @@ archs4x, archs4xd"
|
|||
(set_attr "iscompact" "maybe,false")
|
||||
(set_attr "predicable" "no,no")])
|
||||
|
||||
(define_insn "lshrsi3_cnt1_carry"
|
||||
[(set (reg:CC_C CC_REG)
|
||||
(unspec:CC_C [(and:SI (match_operand:SI 1 "register_operand" "r")
|
||||
(const_int 1))] UNSPEC_ARC_CC_NEZ))
|
||||
(set (match_operand:SI 0 "dest_reg_operand" "=r")
|
||||
(lshiftrt:SI (match_dup 1) (const_int 1)))]
|
||||
""
|
||||
"lsr.f\\t%0,%1"
|
||||
[(set_attr "type" "unary")
|
||||
(set_attr "length" "4")
|
||||
(set_attr "predicable" "no")])
|
||||
|
||||
(define_insn "ashrsi3_cnt1"
|
||||
[(set (match_operand:SI 0 "dest_reg_operand" "=q,w")
|
||||
(ashiftrt:SI (match_operand:SI 1 "register_operand" "q,c")
|
||||
|
@ -6019,6 +6216,28 @@ archs4x, archs4xd"
|
|||
(set_attr "iscompact" "maybe,false")
|
||||
(set_attr "predicable" "no,no")])
|
||||
|
||||
(define_insn "ashrsi3_cnt1_carry"
|
||||
[(set (reg:CC_C CC_REG)
|
||||
(unspec:CC_C [(and:SI (match_operand:SI 1 "register_operand" "r")
|
||||
(const_int 1))] UNSPEC_ARC_CC_NEZ))
|
||||
(set (match_operand:SI 0 "dest_reg_operand" "=r")
|
||||
(ashiftrt:SI (match_dup 1) (const_int 1)))]
|
||||
""
|
||||
"asr.f\\t%0,%1"
|
||||
[(set_attr "type" "unary")
|
||||
(set_attr "length" "4")
|
||||
(set_attr "predicable" "no")])
|
||||
|
||||
(define_insn "btst_0_carry"
|
||||
[(set (reg:CC_C CC_REG)
|
||||
(unspec:CC_C [(and:SI (match_operand:SI 0 "register_operand" "r")
|
||||
(const_int 1))] UNSPEC_ARC_CC_NEZ))]
|
||||
""
|
||||
"asr.f\\t0,%0"
|
||||
[(set_attr "type" "unary")
|
||||
(set_attr "length" "4")
|
||||
(set_attr "predicable" "no")])
|
||||
|
||||
(define_peephole2
|
||||
[(set (match_operand:SI 0 "register_operand" "")
|
||||
(zero_extract:SI (match_dup 0)
|
||||
|
|
10
gcc/testsuite/gcc.target/arc/ashrdi3-1.c
Normal file
10
gcc/testsuite/gcc.target/arc/ashrdi3-1.c
Normal file
|
@ -0,0 +1,10 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2" } */
|
||||
|
||||
long long foo(long long x)
|
||||
{
|
||||
return x >> 1;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "asr.f\\s+r1,r1" } } */
|
||||
/* { dg-final { scan-assembler "rrc\\s+r0,r0" } } */
|
10
gcc/testsuite/gcc.target/arc/lshrdi3-1.c
Normal file
10
gcc/testsuite/gcc.target/arc/lshrdi3-1.c
Normal file
|
@ -0,0 +1,10 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2" } */
|
||||
|
||||
unsigned long long foo(unsigned long long x)
|
||||
{
|
||||
return x >> 1;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "lsr.f\\s+r1,r1" } } */
|
||||
/* { dg-final { scan-assembler "rrc\\s+r0,r0" } } */
|
11
gcc/testsuite/gcc.target/arc/rotldi3-1.c
Normal file
11
gcc/testsuite/gcc.target/arc/rotldi3-1.c
Normal file
|
@ -0,0 +1,11 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2" } */
|
||||
|
||||
unsigned long long foo(unsigned long long x)
|
||||
{
|
||||
return (x << 1) | (x >> 63);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "add.f\\s+r0,r0,r0" } } */
|
||||
/* { dg-final { scan-assembler "adc.f\\s+r1,r1,r1" } } */
|
||||
/* { dg-final { scan-assembler "add.cs\\s+r0,r0,1" } } */
|
11
gcc/testsuite/gcc.target/arc/rotrdi3-1.c
Normal file
11
gcc/testsuite/gcc.target/arc/rotrdi3-1.c
Normal file
|
@ -0,0 +1,11 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2" } */
|
||||
|
||||
unsigned long long foo(unsigned long long x)
|
||||
{
|
||||
return (x >> 1) | (x << 63);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "asr.f\\s+0,r0" } } */
|
||||
/* { dg-final { scan-assembler "rrc.f\\s+r1,r1" } } */
|
||||
/* { dg-final { scan-assembler "rrc\\s+r0,r0" } } */
|
Loading…
Add table
Reference in a new issue