pru: Optimize DI shifts

If the number of shift positions is a constant, then the DI shift
operation is expanded to a sequence of 2 to 4 machine instructions.
That is more efficient than the default action to call libgcc.

gcc/ChangeLog:

	* config/pru/pru.md (lshrdi3): New expand pattern.
	(ashldi3): Ditto.

gcc/testsuite/ChangeLog:

	* gcc.target/pru/ashiftdi-1.c: New test.
	* gcc.target/pru/lshiftrtdi-1.c: New test.

Signed-off-by: Dimitar Dimitrov <dimitar@dinux.eu>
This commit is contained in:
Dimitar Dimitrov 2022-09-22 23:08:43 +03:00
parent 66c48be23e
commit 73137f365a
3 changed files with 302 additions and 0 deletions

View file

@ -703,6 +703,202 @@
[(set_attr "type" "alu")
(set_attr "length" "12")])
; 64-bit LSHIFTRT with a constant shift count can be expanded into
; more efficient code sequence than a variable register shift.
;
; 1. For shift >= 32:
; dst_lo = (src_hi >> (shift - 32))
; dst_hi = 0
;
; 2. For shift==1 there is no need for a temporary:
; dst_lo = (src_lo >> 1)
; if (src_hi & 1)
; dst_lo |= (1 << 31)
; dst_hi = (src_hi >> 1)
;
; 3. For shift < 32:
; dst_lo = (src_lo >> shift)
; tmp = (src_hi << (32 - shift)
; dst_lo |= tmp
; dst_hi = (src_hi >> shift)
;
; 4. For shift in a register:
; Fall back to calling libgcc.
(define_expand "lshrdi3"
[(set (match_operand:DI 0 "register_operand")
(lshiftrt:DI
(match_operand:DI 1 "register_operand")
(match_operand:QI 2 "const_int_operand")))]
""
{
gcc_assert (CONST_INT_P (operands[2]));
const int nshifts = INTVAL (operands[2]);
rtx dst_lo = simplify_gen_subreg (SImode, operands[0], DImode, 0);
rtx dst_hi = simplify_gen_subreg (SImode, operands[0], DImode, 4);
rtx src_lo = simplify_gen_subreg (SImode, operands[1], DImode, 0);
rtx src_hi = simplify_gen_subreg (SImode, operands[1], DImode, 4);
if (nshifts >= 32)
{
emit_insn (gen_rtx_SET (dst_lo,
gen_rtx_LSHIFTRT (SImode,
src_hi,
GEN_INT (nshifts - 32))));
emit_insn (gen_rtx_SET (dst_hi, const0_rtx));
DONE;
}
gcc_assert (can_create_pseudo_p ());
/* The expansions which follow are safe only if DST_LO and SRC_HI
do not overlap. If they do, then fix by using a temporary register.
Overlapping of DST_HI and SRC_LO is safe because by the time DST_HI
is set, SRC_LO is no longer live. */
if (reg_overlap_mentioned_p (dst_lo, src_hi))
{
rtx new_src_hi = gen_reg_rtx (SImode);
emit_move_insn (new_src_hi, src_hi);
src_hi = new_src_hi;
}
if (nshifts == 1)
{
rtx_code_label *skip_hiset_label;
rtx j;
emit_insn (gen_rtx_SET (dst_lo,
gen_rtx_LSHIFTRT (SImode, src_lo, const1_rtx)));
/* The code generated by `genemit' would create a LABEL_REF. */
skip_hiset_label = gen_label_rtx ();
j = emit_jump_insn (gen_cbranch_qbbx_const (EQ,
SImode,
src_hi,
GEN_INT (0),
skip_hiset_label));
JUMP_LABEL (j) = skip_hiset_label;
LABEL_NUSES (skip_hiset_label)++;
emit_insn (gen_iorsi3 (dst_lo, dst_lo, GEN_INT (1 << 31)));
emit_label (skip_hiset_label);
emit_insn (gen_rtx_SET (dst_hi,
gen_rtx_LSHIFTRT (SImode, src_hi, const1_rtx)));
DONE;
}
if (nshifts < 32)
{
rtx tmpval = gen_reg_rtx (SImode);
emit_insn (gen_rtx_SET (dst_lo,
gen_rtx_LSHIFTRT (SImode,
src_lo,
GEN_INT (nshifts))));
emit_insn (gen_rtx_SET (tmpval,
gen_rtx_ASHIFT (SImode,
src_hi,
GEN_INT (32 - nshifts))));
emit_insn (gen_iorsi3 (dst_lo, dst_lo, tmpval));
emit_insn (gen_rtx_SET (dst_hi,
gen_rtx_LSHIFTRT (SImode,
src_hi,
GEN_INT (nshifts))));
DONE;
}
gcc_unreachable ();
})
; 64-bit ASHIFT with a constant shift count can be expanded into
; more efficient code sequence than the libgcc call required by
; a variable shift in a register.
(define_expand "ashldi3"
[(set (match_operand:DI 0 "register_operand")
(ashift:DI
(match_operand:DI 1 "register_operand")
(match_operand:QI 2 "const_int_operand")))]
""
{
gcc_assert (CONST_INT_P (operands[2]));
const int nshifts = INTVAL (operands[2]);
rtx dst_lo = simplify_gen_subreg (SImode, operands[0], DImode, 0);
rtx dst_hi = simplify_gen_subreg (SImode, operands[0], DImode, 4);
rtx src_lo = simplify_gen_subreg (SImode, operands[1], DImode, 0);
rtx src_hi = simplify_gen_subreg (SImode, operands[1], DImode, 4);
if (nshifts >= 32)
{
emit_insn (gen_rtx_SET (dst_hi,
gen_rtx_ASHIFT (SImode,
src_lo,
GEN_INT (nshifts - 32))));
emit_insn (gen_rtx_SET (dst_lo, const0_rtx));
DONE;
}
gcc_assert (can_create_pseudo_p ());
/* The expansions which follow are safe only if DST_HI and SRC_LO
do not overlap. If they do, then fix by using a temporary register.
Overlapping of DST_LO and SRC_HI is safe because by the time DST_LO
is set, SRC_HI is no longer live. */
if (reg_overlap_mentioned_p (dst_hi, src_lo))
{
rtx new_src_lo = gen_reg_rtx (SImode);
emit_move_insn (new_src_lo, src_lo);
src_lo = new_src_lo;
}
if (nshifts == 1)
{
rtx_code_label *skip_hiset_label;
rtx j;
emit_insn (gen_rtx_SET (dst_hi,
gen_rtx_ASHIFT (SImode, src_hi, const1_rtx)));
skip_hiset_label = gen_label_rtx ();
j = emit_jump_insn (gen_cbranch_qbbx_const (EQ,
SImode,
src_lo,
GEN_INT (31),
skip_hiset_label));
JUMP_LABEL (j) = skip_hiset_label;
LABEL_NUSES (skip_hiset_label)++;
emit_insn (gen_iorsi3 (dst_hi, dst_hi, GEN_INT (1 << 0)));
emit_label (skip_hiset_label);
emit_insn (gen_rtx_SET (dst_lo,
gen_rtx_ASHIFT (SImode, src_lo, const1_rtx)));
DONE;
}
if (nshifts < 32)
{
rtx tmpval = gen_reg_rtx (SImode);
emit_insn (gen_rtx_SET (dst_hi,
gen_rtx_ASHIFT (SImode,
src_hi,
GEN_INT (nshifts))));
emit_insn (gen_rtx_SET (tmpval,
gen_rtx_LSHIFTRT (SImode,
src_lo,
GEN_INT (32 - nshifts))));
emit_insn (gen_iorsi3 (dst_hi, dst_hi, tmpval));
emit_insn (gen_rtx_SET (dst_lo,
gen_rtx_ASHIFT (SImode,
src_lo,
GEN_INT (nshifts))));
DONE;
}
gcc_unreachable ();
})
;; Include ALU patterns with zero-extension of operands. That's where
;; the real insns are defined.

View file

@ -0,0 +1,53 @@
/* Functional test for DI left shift. */
/* { dg-do run } */
/* { dg-options "-pedantic-errors" } */
#include <stddef.h>
#include <stdint.h>
extern void abort (void);
uint64_t __attribute__((noinline)) ashift_1 (uint64_t a)
{
return a << 1;
}
uint64_t __attribute__((noinline)) ashift_10 (uint64_t a)
{
return a << 10;
}
uint64_t __attribute__((noinline)) ashift_32 (uint64_t a)
{
return a << 32;
}
uint64_t __attribute__((noinline)) ashift_36 (uint64_t a)
{
return a << 36;
}
int
main (int argc, char** argv)
{
if (ashift_1 (0xaaaa5555aaaa5555ull) != 0x5554aaab5554aaaaull)
abort();
if (ashift_10 (0xaaaa5555aaaa5555ull) != 0xa95556aaa9555400ull)
abort();
if (ashift_32 (0xaaaa5555aaaa5555ull) != 0xaaaa555500000000ull)
abort();
if (ashift_36 (0xaaaa5555aaaa5555ull) != 0xaaa5555000000000ull)
abort();
if (ashift_1 (0x1234567822334455ull) != 0x2468acf0446688aaull)
abort();
if (ashift_10 (0x1234567822334455ull) != 0xd159e088cd115400ull)
abort();
if (ashift_32 (0x1234567822334455ull) != 0x2233445500000000ull)
abort();
if (ashift_36 (0x1234567822334455ull) != 0x2334455000000000ull)
abort();
return 0;
}

View file

@ -0,0 +1,53 @@
/* Functional test for DI right shift. */
/* { dg-do run } */
/* { dg-options "-pedantic-errors" } */
#include <stddef.h>
#include <stdint.h>
extern void abort (void);
uint64_t __attribute__((noinline)) lshift_1 (uint64_t a)
{
return a >> 1;
}
uint64_t __attribute__((noinline)) lshift_10 (uint64_t a)
{
return a >> 10;
}
uint64_t __attribute__((noinline)) lshift_32 (uint64_t a)
{
return a >> 32;
}
uint64_t __attribute__((noinline)) lshift_36 (uint64_t a)
{
return a >> 36;
}
int
main (int argc, char** argv)
{
if (lshift_1 (0xaaaa5555aaaa5555ull) != 0x55552aaad5552aaaull)
abort();
if (lshift_10 (0xaaaa5555aaaa5555ull) != 0x002aaa95556aaa95ull)
abort();
if (lshift_32 (0xaaaa5555aaaa5555ull) != 0x00000000aaaa5555ull)
abort();
if (lshift_36 (0xaaaa5555aaaa5555ull) != 0x000000000aaaa555ull)
abort();
if (lshift_1 (0x1234567822334455ull) != 0x091a2b3c1119a22aull)
abort();
if (lshift_10 (0x1234567822334455ull) != 0x00048d159e088cd1ull)
abort();
if (lshift_32 (0x1234567822334455ull) != 0x0000000012345678ull)
abort();
if (lshift_36 (0x1234567822334455ull) != 0x0000000001234567ull)
abort();
return 0;
}