ARC: Improved DImode rotates and right shifts by one bit.

This patch improves the code generated for DImode right shifts (both
arithmetic and logical) by a single bit, and also for DImode rotates
(both left and right) by a single bit.  In approach, this is similar
to the recently added DImode left shift by a single bit patch, but
also builds upon the x86's UNSPEC carry flag representation:
https://gcc.gnu.org/pipermail/gcc-patches/2023-October/632169.html

The benefits can be seen from the four new test cases:

long long ashr(long long x) { return x >> 1; }

Before:
ashr:   asl     r2,r1,31
        lsr_s   r0,r0
        or_s    r0,r0,r2
        j_s.d   [blink]
        asr_s   r1,r1,1

After:
ashr:   asr.f   r1,r1
        j_s.d   [blink]
        rrc     r0,r0

unsigned long long lshr(unsigned long long x) { return x >> 1; }

Before:
lshr:   asl     r2,r1,31
        lsr_s   r0,r0
        or_s    r0,r0,r2
        j_s.d   [blink]
        lsr_s   r1,r1

After:
lshr:	lsr.f   r1,r1
        j_s.d   [blink]
        rrc     r0,r0

unsigned long long rotl(unsigned long long x) { return (x<<1) | (x>>63); }

Before:
rotl:   lsr     r12,r1,31
        lsr     r2,r0,31
        asl_s   r3,r0,1
        asl_s   r1,r1,1
        or      r0,r12,r3
        j_s.d   [blink]
        or_s    r1,r1,r2

After:
rotl:   add.f   r0,r0,r0
        adc.f   r1,r1,r1
        j_s.d   [blink]
        add.cs  r0,r0,1

unsigned long long rotr(unsigned long long x) { return (x>>1) | (x<<63); }

Before:
rotr:   asl     r12,r1,31
        asl     r2,r0,31
        lsr_s   r3,r0
        lsr_s   r1,r1
        or      r0,r12,r3
        j_s.d   [blink]
        or_s    r1,r1,r2

After:
rotr:   asr.f   0,r0
        rrc.f   r1,r1
        j_s.d   [blink]
        rrc     r0,r0

On CPUs without a barrel shifter the improvements are even better.

2023-11-13  Roger Sayle  <roger@nextmovesoftware.com>

gcc/ChangeLog
	* config/arc/arc.md (UNSPEC_ARC_CC_NEZ): New UNSPEC that
	represents the carry flag being set if the operand is non-zero.
	(adc_f): New define_insn representing adc with updated flags.
	(ashrdi3): New define_expand that only handles shifts by 1.
	(ashrdi3_cnt1): New pre-reload define_insn_and_split.
	(lshrdi3): New define_expand that only handles shifts by 1.
	(lshrdi3_cnt1): New pre-reload define_insn_and_split.
	(rrcsi2): New define_insn for rrc (SImode rotate right through carry).
	(rrcsi2_carry): Likewise for rrc.f, as above but updating flags.
	(rotldi3): New define_expand that only handles rotates by 1.
	(rotldi3_cnt1): New pre-reload define_insn_and_split.
	(rotrdi3): New define_expand that only handles rotates by 1.
	(rotrdi3_cnt1): New pre-reload define_insn_and_split.
	(lshrsi3_cnt1_carry): New define_insn for lsr.f.
	(ashrsi3_cnt1_carry): New define_insn for asr.f.
	(btst_0_carry): New define_insn for asr.f without result.

gcc/testsuite/ChangeLog
	* gcc.target/arc/ashrdi3-1.c: New test case.
	* gcc.target/arc/lshrdi3-1.c: Likewise.
	* gcc.target/arc/rotldi3-1.c: Likewise.
	* gcc.target/arc/rotrdi3-1.c: Likewise.
This commit is contained in:
Roger Sayle 2023-11-13 09:16:59 +00:00
parent e9d59a2a5a
commit b51bfee1be
5 changed files with 261 additions and 0 deletions

View file

@ -136,6 +136,7 @@
UNSPEC_ARC_VMAC2HU
UNSPEC_ARC_VMPY2H
UNSPEC_ARC_VMPY2HU
UNSPEC_ARC_CC_NEZ
VUNSPEC_ARC_RTIE
VUNSPEC_ARC_SYNC
@ -2789,6 +2790,31 @@ archs4x, archs4xd"
(set_attr "type" "cc_arith")
(set_attr "length" "4,4,4,4,8,8")])
(define_insn "adc_f"
[(set (reg:CC_C CC_REG)
(compare:CC_C
(zero_extend:DI
(plus:SI
(plus:SI
(ltu:SI (reg:CC_C CC_REG) (const_int 0))
(match_operand:SI 1 "register_operand" "%r"))
(match_operand:SI 2 "register_operand" "r")))
(plus:DI
(ltu:DI (reg:CC_C CC_REG) (const_int 0))
(zero_extend:DI (match_dup 1)))))
(set (match_operand:SI 0 "register_operand" "=r")
(plus:SI
(plus:SI
(ltu:SI (reg:CC_C CC_REG) (const_int 0))
(match_dup 1))
(match_dup 2)))]
""
"adc.f\\t%0,%1,%2"
[(set_attr "cond" "set")
(set_attr "predicable" "no")
(set_attr "type" "cc_arith")
(set_attr "length" "4")])
; combiner-splitter cmp / scc -> cmp / adc
(define_split
[(set (match_operand:SI 0 "dest_reg_operand" "")
@ -3529,6 +3555,68 @@ archs4x, archs4xd"
""
[(set_attr "length" "8")])
(define_expand "ashrdi3"
[(parallel
[(set (match_operand:DI 0 "register_operand")
(ashiftrt:DI (match_operand:DI 1 "register_operand")
(match_operand:QI 2 "const_int_operand")))
(clobber (reg:CC CC_REG))])]
""
{
if (operands[2] != const1_rtx)
FAIL;
})
;; Split into asr.f hi; rrc lo
(define_insn_and_split "*ashrdi3_cnt1"
[(set (match_operand:DI 0 "register_operand")
(ashiftrt:DI (match_operand:DI 1 "register_operand")
(const_int 1)))
(clobber (reg:CC CC_REG))]
"arc_pre_reload_split ()"
"#"
"&& 1"
[(const_int 0)]
{
emit_insn (gen_ashrsi3_cnt1_carry (gen_highpart (SImode, operands[0]),
gen_highpart (SImode, operands[1])));
emit_insn (gen_rrcsi2 (gen_lowpart (SImode, operands[0]),
gen_lowpart (SImode, operands[1])));
DONE;
}
[(set_attr "length" "8")])
(define_expand "lshrdi3"
[(parallel
[(set (match_operand:DI 0 "register_operand")
(lshiftrt:DI (match_operand:DI 1 "register_operand")
(match_operand:QI 2 "const_int_operand")))
(clobber (reg:CC CC_REG))])]
""
{
if (operands[2] != const1_rtx)
FAIL;
})
;; Split into lsr.f hi; rrc lo
(define_insn_and_split "*lshrdi3_cnt1"
[(set (match_operand:DI 0 "register_operand")
(lshiftrt:DI (match_operand:DI 1 "register_operand")
(const_int 1)))
(clobber (reg:CC CC_REG))]
"arc_pre_reload_split ()"
"#"
"&& 1"
[(const_int 0)]
{
emit_insn (gen_lshrsi3_cnt1_carry (gen_highpart (SImode, operands[0]),
gen_highpart (SImode, operands[1])));
emit_insn (gen_rrcsi2 (gen_lowpart (SImode, operands[0]),
gen_lowpart (SImode, operands[1])));
DONE;
}
[(set_attr "length" "8")])
;; Rotate instructions.
(define_insn "rotrsi3_insn"
@ -3570,6 +3658,103 @@ archs4x, archs4xd"
}
})
;; Rotate through carry flag
(define_insn "rrcsi2"
[(set (match_operand:SI 0 "dest_reg_operand" "=r")
(plus:SI
(lshiftrt:SI (match_operand:SI 1 "register_operand" "r")
(const_int 1))
(ashift:SI (ltu:SI (reg:CC_C CC_REG) (const_int 0))
(const_int 31))))]
""
"rrc\\t%0,%1"
[(set_attr "type" "shift")
(set_attr "predicable" "no")
(set_attr "length" "4")])
(define_insn "rrcsi2_carry"
[(set (reg:CC_C CC_REG)
(unspec:CC_C [(and:SI (match_operand:SI 1 "register_operand" "r")
(const_int 1))] UNSPEC_ARC_CC_NEZ))
(set (match_operand:SI 0 "dest_reg_operand" "=r")
(plus:SI
(lshiftrt:SI (match_dup 1) (const_int 1))
(ashift:SI (ltu:SI (reg:CC_C CC_REG) (const_int 0))
(const_int 31))))]
""
"rrc.f\\t%0,%1"
[(set_attr "type" "shift")
(set_attr "predicable" "no")
(set_attr "length" "4")])
;; DImode Rotate instructions
(define_expand "rotldi3"
[(parallel
[(set (match_operand:DI 0 "register_operand")
(rotate:DI (match_operand:DI 1 "register_operand")
(match_operand:QI 2 "const_int_operand")))
(clobber (reg:CC CC_REG))])]
""
{
if (operands[2] != const1_rtx)
FAIL;
})
;; split into add.f lo; adc.f hi; adc lo
(define_insn_and_split "*rotldi3_cnt1"
[(set (match_operand:DI 0 "register_operand")
(rotate:DI (match_operand:DI 1 "register_operand")
(const_int 1)))
(clobber (reg:CC CC_REG))]
"arc_pre_reload_split ()"
"#"
"&& 1"
[(const_int 0)]
{
rtx lo0 = gen_lowpart (SImode, operands[0]);
rtx lo1 = gen_lowpart (SImode, operands[1]);
rtx hi1 = gen_highpart (SImode, operands[1]);
emit_insn (gen_add_f (lo0, lo1, lo1));
emit_insn (gen_adc_f (gen_highpart (SImode, operands[0]), hi1, hi1));
emit_insn (gen_adc (lo0, lo0, const0_rtx));
DONE;
}
[(set_attr "length" "12")])
(define_expand "rotrdi3"
[(parallel
[(set (match_operand:DI 0 "register_operand")
(rotatert:DI (match_operand:DI 1 "register_operand")
(match_operand:QI 2 "const_int_operand")))
(clobber (reg:CC CC_REG))])]
""
{
if (operands[2] != const1_rtx)
FAIL;
})
;; split into asr.f lo; rrc.f hi; rrc lo
(define_insn_and_split "*rotrdi3_cnt1"
[(set (match_operand:DI 0 "register_operand")
(rotatert:DI (match_operand:DI 1 "register_operand")
(const_int 1)))
(clobber (reg:CC CC_REG))]
"arc_pre_reload_split ()"
"#"
"&& 1"
[(const_int 0)]
{
rtx lo = gen_lowpart (SImode, operands[1]);
emit_insn (gen_btst_0_carry (lo));
emit_insn (gen_rrcsi2_carry (gen_highpart (SImode, operands[0]),
gen_highpart (SImode, operands[1])));
emit_insn (gen_rrcsi2 (gen_lowpart (SImode, operands[0]), lo));
DONE;
}
[(set_attr "length" "12")])
;; Compare / branch instructions.
(define_expand "cbranchsi4"
@ -6009,6 +6194,18 @@ archs4x, archs4xd"
(set_attr "iscompact" "maybe,false")
(set_attr "predicable" "no,no")])
(define_insn "lshrsi3_cnt1_carry"
[(set (reg:CC_C CC_REG)
(unspec:CC_C [(and:SI (match_operand:SI 1 "register_operand" "r")
(const_int 1))] UNSPEC_ARC_CC_NEZ))
(set (match_operand:SI 0 "dest_reg_operand" "=r")
(lshiftrt:SI (match_dup 1) (const_int 1)))]
""
"lsr.f\\t%0,%1"
[(set_attr "type" "unary")
(set_attr "length" "4")
(set_attr "predicable" "no")])
(define_insn "ashrsi3_cnt1"
[(set (match_operand:SI 0 "dest_reg_operand" "=q,w")
(ashiftrt:SI (match_operand:SI 1 "register_operand" "q,c")
@ -6019,6 +6216,28 @@ archs4x, archs4xd"
(set_attr "iscompact" "maybe,false")
(set_attr "predicable" "no,no")])
(define_insn "ashrsi3_cnt1_carry"
[(set (reg:CC_C CC_REG)
(unspec:CC_C [(and:SI (match_operand:SI 1 "register_operand" "r")
(const_int 1))] UNSPEC_ARC_CC_NEZ))
(set (match_operand:SI 0 "dest_reg_operand" "=r")
(ashiftrt:SI (match_dup 1) (const_int 1)))]
""
"asr.f\\t%0,%1"
[(set_attr "type" "unary")
(set_attr "length" "4")
(set_attr "predicable" "no")])
(define_insn "btst_0_carry"
[(set (reg:CC_C CC_REG)
(unspec:CC_C [(and:SI (match_operand:SI 0 "register_operand" "r")
(const_int 1))] UNSPEC_ARC_CC_NEZ))]
""
"asr.f\\t0,%0"
[(set_attr "type" "unary")
(set_attr "length" "4")
(set_attr "predicable" "no")])
(define_peephole2
[(set (match_operand:SI 0 "register_operand" "")
(zero_extract:SI (match_dup 0)

View file

@ -0,0 +1,10 @@
/* { dg-do compile } */
/* { dg-options "-O2" } */
long long foo(long long x)
{
return x >> 1;
}
/* { dg-final { scan-assembler "asr.f\\s+r1,r1" } } */
/* { dg-final { scan-assembler "rrc\\s+r0,r0" } } */

View file

@ -0,0 +1,10 @@
/* { dg-do compile } */
/* { dg-options "-O2" } */
unsigned long long foo(unsigned long long x)
{
return x >> 1;
}
/* { dg-final { scan-assembler "lsr.f\\s+r1,r1" } } */
/* { dg-final { scan-assembler "rrc\\s+r0,r0" } } */

View file

@ -0,0 +1,11 @@
/* { dg-do compile } */
/* { dg-options "-O2" } */
unsigned long long foo(unsigned long long x)
{
return (x << 1) | (x >> 63);
}
/* { dg-final { scan-assembler "add.f\\s+r0,r0,r0" } } */
/* { dg-final { scan-assembler "adc.f\\s+r1,r1,r1" } } */
/* { dg-final { scan-assembler "add.cs\\s+r0,r0,1" } } */

View file

@ -0,0 +1,11 @@
/* { dg-do compile } */
/* { dg-options "-O2" } */
unsigned long long foo(unsigned long long x)
{
return (x >> 1) | (x << 63);
}
/* { dg-final { scan-assembler "asr.f\\s+0,r0" } } */
/* { dg-final { scan-assembler "rrc.f\\s+r1,r1" } } */
/* { dg-final { scan-assembler "rrc\\s+r0,r0" } } */