pru: Optimize DI shifts
If the number of shift positions is a constant, then the DI shift operation is expanded to a sequence of 2 to 4 machine instructions. That is more efficient than the default action to call libgcc. gcc/ChangeLog: * config/pru/pru.md (lshrdi3): New expand pattern. (ashldi3): Ditto. gcc/testsuite/ChangeLog: * gcc.target/pru/ashiftdi-1.c: New test. * gcc.target/pru/lshiftrtdi-1.c: New test. Signed-off-by: Dimitar Dimitrov <dimitar@dinux.eu>
This commit is contained in:
parent
66c48be23e
commit
73137f365a
3 changed files with 302 additions and 0 deletions
|
@ -703,6 +703,202 @@
|
|||
[(set_attr "type" "alu")
|
||||
(set_attr "length" "12")])
|
||||
|
||||
|
||||
; 64-bit LSHIFTRT with a constant shift count can be expanded into
|
||||
; more efficient code sequence than a variable register shift.
|
||||
;
|
||||
; 1. For shift >= 32:
|
||||
; dst_lo = (src_hi >> (shift - 32))
|
||||
; dst_hi = 0
|
||||
;
|
||||
; 2. For shift==1 there is no need for a temporary:
|
||||
; dst_lo = (src_lo >> 1)
|
||||
; if (src_hi & 1)
|
||||
; dst_lo |= (1 << 31)
|
||||
; dst_hi = (src_hi >> 1)
|
||||
;
|
||||
; 3. For shift < 32:
|
||||
; dst_lo = (src_lo >> shift)
|
||||
; tmp = (src_hi << (32 - shift)
|
||||
; dst_lo |= tmp
|
||||
; dst_hi = (src_hi >> shift)
|
||||
;
|
||||
; 4. For shift in a register:
|
||||
; Fall back to calling libgcc.
|
||||
(define_expand "lshrdi3"
|
||||
[(set (match_operand:DI 0 "register_operand")
|
||||
(lshiftrt:DI
|
||||
(match_operand:DI 1 "register_operand")
|
||||
(match_operand:QI 2 "const_int_operand")))]
|
||||
""
|
||||
{
|
||||
gcc_assert (CONST_INT_P (operands[2]));
|
||||
|
||||
const int nshifts = INTVAL (operands[2]);
|
||||
rtx dst_lo = simplify_gen_subreg (SImode, operands[0], DImode, 0);
|
||||
rtx dst_hi = simplify_gen_subreg (SImode, operands[0], DImode, 4);
|
||||
rtx src_lo = simplify_gen_subreg (SImode, operands[1], DImode, 0);
|
||||
rtx src_hi = simplify_gen_subreg (SImode, operands[1], DImode, 4);
|
||||
|
||||
if (nshifts >= 32)
|
||||
{
|
||||
emit_insn (gen_rtx_SET (dst_lo,
|
||||
gen_rtx_LSHIFTRT (SImode,
|
||||
src_hi,
|
||||
GEN_INT (nshifts - 32))));
|
||||
emit_insn (gen_rtx_SET (dst_hi, const0_rtx));
|
||||
DONE;
|
||||
}
|
||||
|
||||
gcc_assert (can_create_pseudo_p ());
|
||||
|
||||
/* The expansions which follow are safe only if DST_LO and SRC_HI
|
||||
do not overlap. If they do, then fix by using a temporary register.
|
||||
Overlapping of DST_HI and SRC_LO is safe because by the time DST_HI
|
||||
is set, SRC_LO is no longer live. */
|
||||
if (reg_overlap_mentioned_p (dst_lo, src_hi))
|
||||
{
|
||||
rtx new_src_hi = gen_reg_rtx (SImode);
|
||||
|
||||
emit_move_insn (new_src_hi, src_hi);
|
||||
src_hi = new_src_hi;
|
||||
}
|
||||
|
||||
if (nshifts == 1)
|
||||
{
|
||||
rtx_code_label *skip_hiset_label;
|
||||
rtx j;
|
||||
|
||||
emit_insn (gen_rtx_SET (dst_lo,
|
||||
gen_rtx_LSHIFTRT (SImode, src_lo, const1_rtx)));
|
||||
|
||||
/* The code generated by `genemit' would create a LABEL_REF. */
|
||||
skip_hiset_label = gen_label_rtx ();
|
||||
j = emit_jump_insn (gen_cbranch_qbbx_const (EQ,
|
||||
SImode,
|
||||
src_hi,
|
||||
GEN_INT (0),
|
||||
skip_hiset_label));
|
||||
JUMP_LABEL (j) = skip_hiset_label;
|
||||
LABEL_NUSES (skip_hiset_label)++;
|
||||
|
||||
emit_insn (gen_iorsi3 (dst_lo, dst_lo, GEN_INT (1 << 31)));
|
||||
emit_label (skip_hiset_label);
|
||||
emit_insn (gen_rtx_SET (dst_hi,
|
||||
gen_rtx_LSHIFTRT (SImode, src_hi, const1_rtx)));
|
||||
DONE;
|
||||
}
|
||||
|
||||
if (nshifts < 32)
|
||||
{
|
||||
rtx tmpval = gen_reg_rtx (SImode);
|
||||
|
||||
emit_insn (gen_rtx_SET (dst_lo,
|
||||
gen_rtx_LSHIFTRT (SImode,
|
||||
src_lo,
|
||||
GEN_INT (nshifts))));
|
||||
emit_insn (gen_rtx_SET (tmpval,
|
||||
gen_rtx_ASHIFT (SImode,
|
||||
src_hi,
|
||||
GEN_INT (32 - nshifts))));
|
||||
emit_insn (gen_iorsi3 (dst_lo, dst_lo, tmpval));
|
||||
emit_insn (gen_rtx_SET (dst_hi,
|
||||
gen_rtx_LSHIFTRT (SImode,
|
||||
src_hi,
|
||||
GEN_INT (nshifts))));
|
||||
DONE;
|
||||
}
|
||||
gcc_unreachable ();
|
||||
})
|
||||
|
||||
; 64-bit ASHIFT with a constant shift count can be expanded into
|
||||
; more efficient code sequence than the libgcc call required by
|
||||
; a variable shift in a register.
|
||||
|
||||
(define_expand "ashldi3"
|
||||
[(set (match_operand:DI 0 "register_operand")
|
||||
(ashift:DI
|
||||
(match_operand:DI 1 "register_operand")
|
||||
(match_operand:QI 2 "const_int_operand")))]
|
||||
""
|
||||
{
|
||||
gcc_assert (CONST_INT_P (operands[2]));
|
||||
|
||||
const int nshifts = INTVAL (operands[2]);
|
||||
rtx dst_lo = simplify_gen_subreg (SImode, operands[0], DImode, 0);
|
||||
rtx dst_hi = simplify_gen_subreg (SImode, operands[0], DImode, 4);
|
||||
rtx src_lo = simplify_gen_subreg (SImode, operands[1], DImode, 0);
|
||||
rtx src_hi = simplify_gen_subreg (SImode, operands[1], DImode, 4);
|
||||
|
||||
if (nshifts >= 32)
|
||||
{
|
||||
emit_insn (gen_rtx_SET (dst_hi,
|
||||
gen_rtx_ASHIFT (SImode,
|
||||
src_lo,
|
||||
GEN_INT (nshifts - 32))));
|
||||
emit_insn (gen_rtx_SET (dst_lo, const0_rtx));
|
||||
DONE;
|
||||
}
|
||||
|
||||
gcc_assert (can_create_pseudo_p ());
|
||||
|
||||
/* The expansions which follow are safe only if DST_HI and SRC_LO
|
||||
do not overlap. If they do, then fix by using a temporary register.
|
||||
Overlapping of DST_LO and SRC_HI is safe because by the time DST_LO
|
||||
is set, SRC_HI is no longer live. */
|
||||
if (reg_overlap_mentioned_p (dst_hi, src_lo))
|
||||
{
|
||||
rtx new_src_lo = gen_reg_rtx (SImode);
|
||||
|
||||
emit_move_insn (new_src_lo, src_lo);
|
||||
src_lo = new_src_lo;
|
||||
}
|
||||
|
||||
if (nshifts == 1)
|
||||
{
|
||||
rtx_code_label *skip_hiset_label;
|
||||
rtx j;
|
||||
|
||||
emit_insn (gen_rtx_SET (dst_hi,
|
||||
gen_rtx_ASHIFT (SImode, src_hi, const1_rtx)));
|
||||
|
||||
skip_hiset_label = gen_label_rtx ();
|
||||
j = emit_jump_insn (gen_cbranch_qbbx_const (EQ,
|
||||
SImode,
|
||||
src_lo,
|
||||
GEN_INT (31),
|
||||
skip_hiset_label));
|
||||
JUMP_LABEL (j) = skip_hiset_label;
|
||||
LABEL_NUSES (skip_hiset_label)++;
|
||||
|
||||
emit_insn (gen_iorsi3 (dst_hi, dst_hi, GEN_INT (1 << 0)));
|
||||
emit_label (skip_hiset_label);
|
||||
emit_insn (gen_rtx_SET (dst_lo,
|
||||
gen_rtx_ASHIFT (SImode, src_lo, const1_rtx)));
|
||||
DONE;
|
||||
}
|
||||
|
||||
if (nshifts < 32)
|
||||
{
|
||||
rtx tmpval = gen_reg_rtx (SImode);
|
||||
|
||||
emit_insn (gen_rtx_SET (dst_hi,
|
||||
gen_rtx_ASHIFT (SImode,
|
||||
src_hi,
|
||||
GEN_INT (nshifts))));
|
||||
emit_insn (gen_rtx_SET (tmpval,
|
||||
gen_rtx_LSHIFTRT (SImode,
|
||||
src_lo,
|
||||
GEN_INT (32 - nshifts))));
|
||||
emit_insn (gen_iorsi3 (dst_hi, dst_hi, tmpval));
|
||||
emit_insn (gen_rtx_SET (dst_lo,
|
||||
gen_rtx_ASHIFT (SImode,
|
||||
src_lo,
|
||||
GEN_INT (nshifts))));
|
||||
DONE;
|
||||
}
|
||||
gcc_unreachable ();
|
||||
})
|
||||
|
||||
;; Include ALU patterns with zero-extension of operands. That's where
|
||||
;; the real insns are defined.
|
||||
|
|
53
gcc/testsuite/gcc.target/pru/ashiftdi-1.c
Normal file
53
gcc/testsuite/gcc.target/pru/ashiftdi-1.c
Normal file
|
@ -0,0 +1,53 @@
|
|||
/* Functional test for DI left shift. */
|
||||
|
||||
/* { dg-do run } */
|
||||
/* { dg-options "-pedantic-errors" } */
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
extern void abort (void);
|
||||
|
||||
uint64_t __attribute__((noinline)) ashift_1 (uint64_t a)
|
||||
{
|
||||
return a << 1;
|
||||
}
|
||||
|
||||
uint64_t __attribute__((noinline)) ashift_10 (uint64_t a)
|
||||
{
|
||||
return a << 10;
|
||||
}
|
||||
|
||||
uint64_t __attribute__((noinline)) ashift_32 (uint64_t a)
|
||||
{
|
||||
return a << 32;
|
||||
}
|
||||
|
||||
uint64_t __attribute__((noinline)) ashift_36 (uint64_t a)
|
||||
{
|
||||
return a << 36;
|
||||
}
|
||||
|
||||
int
|
||||
main (int argc, char** argv)
|
||||
{
|
||||
if (ashift_1 (0xaaaa5555aaaa5555ull) != 0x5554aaab5554aaaaull)
|
||||
abort();
|
||||
if (ashift_10 (0xaaaa5555aaaa5555ull) != 0xa95556aaa9555400ull)
|
||||
abort();
|
||||
if (ashift_32 (0xaaaa5555aaaa5555ull) != 0xaaaa555500000000ull)
|
||||
abort();
|
||||
if (ashift_36 (0xaaaa5555aaaa5555ull) != 0xaaa5555000000000ull)
|
||||
abort();
|
||||
|
||||
if (ashift_1 (0x1234567822334455ull) != 0x2468acf0446688aaull)
|
||||
abort();
|
||||
if (ashift_10 (0x1234567822334455ull) != 0xd159e088cd115400ull)
|
||||
abort();
|
||||
if (ashift_32 (0x1234567822334455ull) != 0x2233445500000000ull)
|
||||
abort();
|
||||
if (ashift_36 (0x1234567822334455ull) != 0x2334455000000000ull)
|
||||
abort();
|
||||
|
||||
return 0;
|
||||
}
|
53
gcc/testsuite/gcc.target/pru/lshiftrtdi-1.c
Normal file
53
gcc/testsuite/gcc.target/pru/lshiftrtdi-1.c
Normal file
|
@ -0,0 +1,53 @@
|
|||
/* Functional test for DI right shift. */
|
||||
|
||||
/* { dg-do run } */
|
||||
/* { dg-options "-pedantic-errors" } */
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
extern void abort (void);
|
||||
|
||||
uint64_t __attribute__((noinline)) lshift_1 (uint64_t a)
|
||||
{
|
||||
return a >> 1;
|
||||
}
|
||||
|
||||
uint64_t __attribute__((noinline)) lshift_10 (uint64_t a)
|
||||
{
|
||||
return a >> 10;
|
||||
}
|
||||
|
||||
uint64_t __attribute__((noinline)) lshift_32 (uint64_t a)
|
||||
{
|
||||
return a >> 32;
|
||||
}
|
||||
|
||||
uint64_t __attribute__((noinline)) lshift_36 (uint64_t a)
|
||||
{
|
||||
return a >> 36;
|
||||
}
|
||||
|
||||
int
|
||||
main (int argc, char** argv)
|
||||
{
|
||||
if (lshift_1 (0xaaaa5555aaaa5555ull) != 0x55552aaad5552aaaull)
|
||||
abort();
|
||||
if (lshift_10 (0xaaaa5555aaaa5555ull) != 0x002aaa95556aaa95ull)
|
||||
abort();
|
||||
if (lshift_32 (0xaaaa5555aaaa5555ull) != 0x00000000aaaa5555ull)
|
||||
abort();
|
||||
if (lshift_36 (0xaaaa5555aaaa5555ull) != 0x000000000aaaa555ull)
|
||||
abort();
|
||||
|
||||
if (lshift_1 (0x1234567822334455ull) != 0x091a2b3c1119a22aull)
|
||||
abort();
|
||||
if (lshift_10 (0x1234567822334455ull) != 0x00048d159e088cd1ull)
|
||||
abort();
|
||||
if (lshift_32 (0x1234567822334455ull) != 0x0000000012345678ull)
|
||||
abort();
|
||||
if (lshift_36 (0x1234567822334455ull) != 0x0000000001234567ull)
|
||||
abort();
|
||||
|
||||
return 0;
|
||||
}
|
Loading…
Add table
Reference in a new issue