pru: Add cbranchdi4 pattern
Manually expanding into 32-bit comparisons is much more efficient than the default expansion into word-size comparisons. Note that word for PRU is 8-bit. PR target/106562 gcc/ChangeLog: * config/pru/pru-protos.h (pru_noteq_condition): New function declaration. * config/pru/pru.cc (pru_noteq_condition): New function. * config/pru/pru.md (cbranchdi4): Define new pattern. gcc/testsuite/ChangeLog: * gcc.target/pru/pr106562-1.c: New test. * gcc.target/pru/pr106562-2.c: New test. * gcc.target/pru/pr106562-3.c: New test. * gcc.target/pru/pr106562-4.c: New test. Signed-off-by: Dimitar Dimitrov <dimitar@dinux.eu>
This commit is contained in:
parent
73137f365a
commit
e95e91eccd
7 changed files with 388 additions and 0 deletions
|
@ -52,6 +52,7 @@ extern const char *pru_output_signed_cbranch (rtx *, bool);
|
|||
extern const char *pru_output_signed_cbranch_ubyteop2 (rtx *, bool);
|
||||
extern const char *pru_output_signed_cbranch_zeroop2 (rtx *, bool);
|
||||
|
||||
extern enum rtx_code pru_noteq_condition (enum rtx_code code);
|
||||
extern rtx pru_expand_fp_compare (rtx comparison, machine_mode mode);
|
||||
|
||||
extern void pru_emit_doloop (rtx *, int);
|
||||
|
|
|
@ -895,6 +895,27 @@ pru_init_libfuncs (void)
|
|||
set_optab_libfunc (udivmod_optab, DImode, "__pruabi_divremull");
|
||||
}
|
||||
|
||||
/* Given a comparison CODE, return a similar comparison but without
|
||||
the "equals" condition. In other words, it strips GE/GEU/LE/LEU
|
||||
and instead returns GT/GTU/LT/LTU. */
|
||||
|
||||
enum rtx_code
|
||||
pru_noteq_condition (enum rtx_code code)
|
||||
{
|
||||
switch (code)
|
||||
{
|
||||
case GT: return GT;
|
||||
case GTU: return GTU;
|
||||
case GE: return GT;
|
||||
case GEU: return GTU;
|
||||
case LT: return LT;
|
||||
case LTU: return LTU;
|
||||
case LE: return LT;
|
||||
case LEU: return LTU;
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
}
|
||||
|
||||
/* Emit comparison instruction if necessary, returning the expression
|
||||
that holds the compare result in the proper mode. Return the comparison
|
||||
|
|
|
@ -1309,6 +1309,186 @@
|
|||
operands[2] = XEXP (t, 1);
|
||||
})
|
||||
|
||||
;; Expand the cbranchdi pattern in order to avoid the default
|
||||
;; expansion into word_mode operations, which is not efficient for PRU.
|
||||
;; In pseudocode this expansion outputs:
|
||||
;;
|
||||
;; /* EQ */
|
||||
;; if (OP1_hi {reverse_condition (cmp)} OP2_hi)
|
||||
;; goto fallthrough
|
||||
;; if (OP1_lo {cmp} OP2_lo)
|
||||
;; goto label3
|
||||
;; fallthrough:
|
||||
;;
|
||||
;; /* NE */
|
||||
;; if (OP1_hi {cmp} OP2_hi)
|
||||
;; goto label3
|
||||
;; if (OP1_lo {cmp} OP2_lo)
|
||||
;; goto label3
|
||||
;;
|
||||
;; The LT comparisons with zero take one machine instruction to simply
|
||||
;; check the sign bit. The GT comparisons with zero take two - one
|
||||
;; to check the sign bit, and one to check for zero. Hence arrange
|
||||
;; the expand such that only LT comparison is used for OP1_HI, because
|
||||
;; OP2_HI is const0_rtx.
|
||||
;;
|
||||
;; The LTU comparisons with zero will be removed by subsequent passes.
|
||||
;;
|
||||
;; /* LT/LTU/LE/LEU */
|
||||
;; if (OP1_hi {noteq_condition (cmp)} OP2_hi)
|
||||
;; goto label3 /* DI comparison obviously true. */
|
||||
;; if (OP1_hi != OP2_hi)
|
||||
;; goto fallthrough /* DI comparison obviously not true. */
|
||||
;; if (OP1_lo {unsigned_condition (cmp)} OP2_lo)
|
||||
;; goto label3 /* Comparison was deferred to lo parts. */
|
||||
;; fallthrough:
|
||||
|
||||
;; /* GT/GTU/GE/GEU */
|
||||
;; if (OP1_hi {reverse_condition (noteq_condition (cmp))} OP2_hi)
|
||||
;; goto fallthrough /* DI comparison obviously not true. */
|
||||
;; if (OP1_hi != OP2_hi)
|
||||
;; goto label3 /* DI comparison obviously true. */
|
||||
;; if (OP1_lo {unsigned_condition (cmp)} OP2_lo)
|
||||
;; goto label3 /* Comparison was deferred to lo parts. */
|
||||
;; fallthrough:
|
||||
|
||||
(define_expand "cbranchdi4"
|
||||
[(set (pc)
|
||||
(if_then_else
|
||||
(match_operator 0 "ordered_comparison_operator"
|
||||
[(match_operand:DI 1 "register_operand")
|
||||
(match_operand:DI 2 "reg_or_ubyte_operand")])
|
||||
(label_ref (match_operand 3 ""))
|
||||
(pc)))]
|
||||
""
|
||||
{
|
||||
const enum rtx_code code = GET_CODE (operands[0]);
|
||||
rtx label3 = operands[3];
|
||||
rtx op1_lo = simplify_gen_subreg (SImode, operands[1], DImode, 0);
|
||||
rtx op1_hi = simplify_gen_subreg (SImode, operands[1], DImode, 4);
|
||||
rtx op2_lo = simplify_gen_subreg (SImode, operands[2], DImode, 0);
|
||||
rtx op2_hi = simplify_gen_subreg (SImode, operands[2], DImode, 4);
|
||||
rtx j;
|
||||
|
||||
if (code == EQ)
|
||||
{
|
||||
rtx label_fallthrough = gen_label_rtx ();
|
||||
rtx label_fallthrough_ref = gen_rtx_LABEL_REF (Pmode, label_fallthrough);
|
||||
|
||||
rtx cond_hi = gen_rtx_fmt_ee (NE, VOIDmode, op1_hi, op2_hi);
|
||||
rtx check_hi = gen_rtx_IF_THEN_ELSE (VOIDmode, cond_hi,
|
||||
label_fallthrough_ref, pc_rtx);
|
||||
j = emit_jump_insn (gen_rtx_SET (pc_rtx, check_hi));
|
||||
JUMP_LABEL (j) = label_fallthrough;
|
||||
LABEL_NUSES (label_fallthrough)++;
|
||||
|
||||
rtx label3_ref = gen_rtx_LABEL_REF (Pmode, label3);
|
||||
rtx cond_lo = gen_rtx_fmt_ee (EQ, VOIDmode, op1_lo, op2_lo);
|
||||
rtx check_lo = gen_rtx_IF_THEN_ELSE (VOIDmode, cond_lo,
|
||||
label3_ref, pc_rtx);
|
||||
j = emit_jump_insn (gen_rtx_SET (pc_rtx, check_lo));
|
||||
JUMP_LABEL (j) = label3;
|
||||
LABEL_NUSES (label3)++;
|
||||
|
||||
emit_label (label_fallthrough);
|
||||
DONE;
|
||||
}
|
||||
if (code == NE)
|
||||
{
|
||||
rtx label3_ref1 = gen_rtx_LABEL_REF (Pmode, label3);
|
||||
rtx cond_hi = gen_rtx_fmt_ee (NE, VOIDmode, op1_hi, op2_hi);
|
||||
rtx check_hi = gen_rtx_IF_THEN_ELSE (VOIDmode, cond_hi,
|
||||
label3_ref1, pc_rtx);
|
||||
j = emit_jump_insn (gen_rtx_SET (pc_rtx, check_hi));
|
||||
JUMP_LABEL (j) = label3;
|
||||
LABEL_NUSES (label3)++;
|
||||
|
||||
rtx label3_ref2 = gen_rtx_LABEL_REF (Pmode, label3);
|
||||
rtx cond_lo = gen_rtx_fmt_ee (NE, VOIDmode, op1_lo, op2_lo);
|
||||
rtx check_lo = gen_rtx_IF_THEN_ELSE (VOIDmode, cond_lo,
|
||||
label3_ref2, pc_rtx);
|
||||
j = emit_jump_insn (gen_rtx_SET (pc_rtx, check_lo));
|
||||
JUMP_LABEL (j) = label3;
|
||||
LABEL_NUSES (label3)++;
|
||||
|
||||
DONE;
|
||||
}
|
||||
|
||||
if (code == LT || code == LTU || code == LE || code == LEU)
|
||||
{
|
||||
/* Check for "DI comparison obviously true". */
|
||||
rtx label3_ref1 = gen_rtx_LABEL_REF (Pmode, label3);
|
||||
rtx cond_hi = gen_rtx_fmt_ee (pru_noteq_condition (code),
|
||||
VOIDmode, op1_hi, op2_hi);
|
||||
rtx check_hi = gen_rtx_IF_THEN_ELSE (VOIDmode, cond_hi,
|
||||
label3_ref1, pc_rtx);
|
||||
j = emit_jump_insn (gen_rtx_SET (pc_rtx, check_hi));
|
||||
JUMP_LABEL (j) = label3;
|
||||
LABEL_NUSES (label3)++;
|
||||
|
||||
/* Check for "DI comparison obviously not true". */
|
||||
rtx label_fallthrough = gen_label_rtx ();
|
||||
rtx label_fallthrough_ref = gen_rtx_LABEL_REF (Pmode, label_fallthrough);
|
||||
rtx cond_hine = gen_rtx_fmt_ee (NE, VOIDmode, op1_hi, op2_hi);
|
||||
rtx check_hine = gen_rtx_IF_THEN_ELSE (VOIDmode, cond_hine,
|
||||
label_fallthrough_ref, pc_rtx);
|
||||
j = emit_jump_insn (gen_rtx_SET (pc_rtx, check_hine));
|
||||
JUMP_LABEL (j) = label_fallthrough;
|
||||
LABEL_NUSES (label_fallthrough)++;
|
||||
|
||||
/* Comparison deferred to the lo parts. */
|
||||
rtx label3_ref2 = gen_rtx_LABEL_REF (Pmode, label3);
|
||||
rtx cond_lo = gen_rtx_fmt_ee (unsigned_condition (code),
|
||||
VOIDmode, op1_lo, op2_lo);
|
||||
rtx check_lo = gen_rtx_IF_THEN_ELSE (VOIDmode, cond_lo,
|
||||
label3_ref2, pc_rtx);
|
||||
j = emit_jump_insn (gen_rtx_SET (pc_rtx, check_lo));
|
||||
JUMP_LABEL (j) = label3;
|
||||
LABEL_NUSES (label3)++;
|
||||
|
||||
emit_label (label_fallthrough);
|
||||
DONE;
|
||||
}
|
||||
|
||||
if (code == GT || code == GTU || code == GE || code == GEU)
|
||||
{
|
||||
/* Check for "DI comparison obviously not true". */
|
||||
const enum rtx_code reversed_code = reverse_condition (code);
|
||||
rtx label_fallthrough = gen_label_rtx ();
|
||||
rtx label_fallthrough_ref = gen_rtx_LABEL_REF (Pmode, label_fallthrough);
|
||||
rtx cond_hi = gen_rtx_fmt_ee (pru_noteq_condition (reversed_code),
|
||||
VOIDmode, op1_hi, op2_hi);
|
||||
rtx check_hi = gen_rtx_IF_THEN_ELSE (VOIDmode, cond_hi,
|
||||
label_fallthrough_ref, pc_rtx);
|
||||
j = emit_jump_insn (gen_rtx_SET (pc_rtx, check_hi));
|
||||
JUMP_LABEL (j) = label_fallthrough;
|
||||
LABEL_NUSES (label_fallthrough)++;
|
||||
|
||||
/* Check for "DI comparison obviously true". */
|
||||
rtx label3_ref1 = gen_rtx_LABEL_REF (Pmode, label3);
|
||||
rtx cond_hine = gen_rtx_fmt_ee (NE, VOIDmode, op1_hi, op2_hi);
|
||||
rtx check_hine = gen_rtx_IF_THEN_ELSE (VOIDmode, cond_hine,
|
||||
label3_ref1, pc_rtx);
|
||||
j = emit_jump_insn (gen_rtx_SET (pc_rtx, check_hine));
|
||||
JUMP_LABEL (j) = label3;
|
||||
LABEL_NUSES (label3)++;
|
||||
|
||||
/* Comparison deferred to the lo parts. */
|
||||
rtx label3_ref2 = gen_rtx_LABEL_REF (Pmode, label3);
|
||||
rtx cond_lo = gen_rtx_fmt_ee (unsigned_condition (code),
|
||||
VOIDmode, op1_lo, op2_lo);
|
||||
rtx check_lo = gen_rtx_IF_THEN_ELSE (VOIDmode, cond_lo,
|
||||
label3_ref2, pc_rtx);
|
||||
j = emit_jump_insn (gen_rtx_SET (pc_rtx, check_lo));
|
||||
JUMP_LABEL (j) = label3;
|
||||
LABEL_NUSES (label3)++;
|
||||
|
||||
emit_label (label_fallthrough);
|
||||
DONE;
|
||||
}
|
||||
gcc_unreachable ();
|
||||
})
|
||||
|
||||
;
|
||||
; Bit test branch
|
||||
|
||||
|
|
9
gcc/testsuite/gcc.target/pru/pr106562-1.c
Normal file
9
gcc/testsuite/gcc.target/pru/pr106562-1.c
Normal file
|
@ -0,0 +1,9 @@
|
|||
/* { dg-do assemble } */
|
||||
/* { dg-options "-Os" } */
|
||||
/* { dg-final { object-size text <= 40 } } */
|
||||
|
||||
|
||||
char test(unsigned long long a, unsigned long long b)
|
||||
{
|
||||
return a && b;
|
||||
}
|
9
gcc/testsuite/gcc.target/pru/pr106562-2.c
Normal file
9
gcc/testsuite/gcc.target/pru/pr106562-2.c
Normal file
|
@ -0,0 +1,9 @@
|
|||
/* { dg-do assemble } */
|
||||
/* { dg-options "-Os" } */
|
||||
/* { dg-final { object-size text <= 32 } } */
|
||||
|
||||
|
||||
char test(long long a)
|
||||
{
|
||||
return a > 10;
|
||||
}
|
9
gcc/testsuite/gcc.target/pru/pr106562-3.c
Normal file
9
gcc/testsuite/gcc.target/pru/pr106562-3.c
Normal file
|
@ -0,0 +1,9 @@
|
|||
/* { dg-do assemble } */
|
||||
/* { dg-options "-Os" } */
|
||||
/* { dg-final { object-size text <= 32 } } */
|
||||
|
||||
|
||||
char test(long long a)
|
||||
{
|
||||
return a < 10;
|
||||
}
|
159
gcc/testsuite/gcc.target/pru/pr106562-4.c
Normal file
159
gcc/testsuite/gcc.target/pru/pr106562-4.c
Normal file
|
@ -0,0 +1,159 @@
|
|||
/* Functional test for DI comparisons. */
|
||||
|
||||
/* { dg-do run } */
|
||||
/* { dg-options "-pedantic-errors" } */
|
||||
|
||||
/* The default test suite options use "-ansi", which
|
||||
generates spurious errors by enabling "-Wlong-long".
|
||||
Thus override the options and drop "-ansi", in order
|
||||
to freely use 64-bit (long long) types for PRU. */
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
extern void abort (void);
|
||||
|
||||
char __attribute__((noinline)) test_lt (int64_t a, int64_t b)
|
||||
{
|
||||
return a < b;
|
||||
}
|
||||
|
||||
char __attribute__((noinline)) test_ltu (uint64_t a, uint64_t b)
|
||||
{
|
||||
return a < b;
|
||||
}
|
||||
|
||||
char __attribute__((noinline)) test_le (int64_t a, int64_t b)
|
||||
{
|
||||
return a <= b;
|
||||
}
|
||||
|
||||
char __attribute__((noinline)) test_leu (uint64_t a, uint64_t b)
|
||||
{
|
||||
return a <= b;
|
||||
}
|
||||
|
||||
char __attribute__((noinline)) test_gt (int64_t a, int64_t b)
|
||||
{
|
||||
return a > b;
|
||||
}
|
||||
|
||||
char __attribute__((noinline)) test_gtu (uint64_t a, uint64_t b)
|
||||
{
|
||||
return a > b;
|
||||
}
|
||||
|
||||
char __attribute__((noinline)) test_ge (int64_t a, int64_t b)
|
||||
{
|
||||
return a >= b;
|
||||
}
|
||||
|
||||
char __attribute__((noinline)) test_geu (uint64_t a, uint64_t b)
|
||||
{
|
||||
return a >= b;
|
||||
}
|
||||
|
||||
char __attribute__((noinline)) test_eq (uint64_t a, uint64_t b)
|
||||
{
|
||||
return a == b;
|
||||
}
|
||||
|
||||
char __attribute__((noinline)) test_ne (uint64_t a, uint64_t b)
|
||||
{
|
||||
return a != b;
|
||||
}
|
||||
|
||||
struct test_case {
|
||||
uint64_t a;
|
||||
uint64_t b;
|
||||
char lt;
|
||||
char ltu;
|
||||
char le;
|
||||
char leu;
|
||||
char gt;
|
||||
char gtu;
|
||||
char ge;
|
||||
char geu;
|
||||
char eq;
|
||||
char ne;
|
||||
};
|
||||
|
||||
const struct test_case cases[] = {
|
||||
/* LT,LTU,LE,LEU,GT,GTU,GE,GEU,EQ,NE */
|
||||
{ 0x1234567800112233ULL,
|
||||
0x1234567800112233ULL, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0 },
|
||||
{ 0x0000000000000000ULL,
|
||||
0x0000000000000000ULL, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0 },
|
||||
{ 0xffffffffffffffffULL,
|
||||
0xffffffffffffffffULL, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0 },
|
||||
|
||||
{ 0xffffffffffffffefULL,
|
||||
0xffffffffffffffffULL, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1 },
|
||||
{ 0x8000000000000000ULL,
|
||||
0xffffffffffffffffULL, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1 },
|
||||
{ 0x80000000ffffffffULL,
|
||||
0xffffffffffffffffULL, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1 },
|
||||
{ 0x80000000ffffffffULL,
|
||||
0xffffffff00000000ULL, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1 },
|
||||
{ 0xffefffffffffffffULL,
|
||||
0xffffffffffffffffULL, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1 },
|
||||
|
||||
{ 0x0000000000000000ULL,
|
||||
0xffffffffffffffffULL, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1 },
|
||||
{ 0x0000000000000001ULL,
|
||||
0xffffffffffffffffULL, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1 },
|
||||
{ 0x0000000000000001ULL,
|
||||
0x8000000000000000ULL, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1 },
|
||||
{ 0x7fffffffffffffffULL,
|
||||
0x8000000000000000ULL, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1 },
|
||||
|
||||
/* Ensure lo uses unsigned comparison if hi parts are same. */
|
||||
{ 0x12345678ffffffffULL,
|
||||
0x1234567800000001ULL, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1 },
|
||||
{ 0xf23456780fffffffULL,
|
||||
0xf234567800000001ULL, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1 },
|
||||
{ 0xf2345678ffffffffULL,
|
||||
0xf234567800000001ULL, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1 },
|
||||
{ 0x1234567800000002ULL,
|
||||
0x1234567800000001ULL, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1 },
|
||||
{ 0x1234567800000002ULL,
|
||||
0x1234567800000003ULL, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1 },
|
||||
};
|
||||
|
||||
int
|
||||
main (void)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < (sizeof (cases)/sizeof (cases[0])); i++)
|
||||
{
|
||||
const int64_t sa = (int64_t)cases[i].a;
|
||||
const int64_t sb = (int64_t)cases[i].b;
|
||||
const uint64_t ua = cases[i].a;
|
||||
const uint64_t ub = cases[i].b;
|
||||
|
||||
if (cases[i].lt != test_lt (sa, sb))
|
||||
abort ();
|
||||
if (cases[i].ltu != test_ltu (ua, ub))
|
||||
abort ();
|
||||
if (cases[i].le != test_le (sa, sb))
|
||||
abort ();
|
||||
if (cases[i].leu != test_leu (ua, ub))
|
||||
abort ();
|
||||
if (cases[i].gt != test_gt (sa, sb))
|
||||
abort ();
|
||||
if (cases[i].gtu != test_gtu (ua, ub))
|
||||
abort ();
|
||||
if (cases[i].ge != test_ge (sa, sb))
|
||||
abort ();
|
||||
if (cases[i].geu != test_geu (ua, ub))
|
||||
abort ();
|
||||
if (cases[i].eq != test_eq (ua, ub))
|
||||
abort ();
|
||||
if (cases[i].ne != test_ne (ua, ub))
|
||||
abort ();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
Loading…
Add table
Reference in a new issue