pru: Add cbranchdi4 pattern

Manually expanding into 32-bit comparisons is much more efficient than
the default expansion into word-size comparisons.  Note that word for PRU
is 8-bit.

	PR target/106562

gcc/ChangeLog:

	* config/pru/pru-protos.h (pru_noteq_condition): New
	function declaration.
	* config/pru/pru.cc (pru_noteq_condition): New function.
	* config/pru/pru.md (cbranchdi4): Define new pattern.

gcc/testsuite/ChangeLog:

	* gcc.target/pru/pr106562-1.c: New test.
	* gcc.target/pru/pr106562-2.c: New test.
	* gcc.target/pru/pr106562-3.c: New test.
	* gcc.target/pru/pr106562-4.c: New test.

Signed-off-by: Dimitar Dimitrov <dimitar@dinux.eu>
This commit is contained in:
Dimitar Dimitrov 2022-09-18 16:27:18 +03:00
parent 73137f365a
commit e95e91eccd
7 changed files with 388 additions and 0 deletions

View file

@ -52,6 +52,7 @@ extern const char *pru_output_signed_cbranch (rtx *, bool);
extern const char *pru_output_signed_cbranch_ubyteop2 (rtx *, bool);
extern const char *pru_output_signed_cbranch_zeroop2 (rtx *, bool);
extern enum rtx_code pru_noteq_condition (enum rtx_code code);
extern rtx pru_expand_fp_compare (rtx comparison, machine_mode mode);
extern void pru_emit_doloop (rtx *, int);

View file

@ -895,6 +895,27 @@ pru_init_libfuncs (void)
set_optab_libfunc (udivmod_optab, DImode, "__pruabi_divremull");
}
/* Given a comparison CODE, return a similar comparison but without
the "equals" condition. In other words, it strips GE/GEU/LE/LEU
and instead returns GT/GTU/LT/LTU. */
enum rtx_code
pru_noteq_condition (enum rtx_code code)
{
switch (code)
{
case GT: return GT;
case GTU: return GTU;
case GE: return GT;
case GEU: return GTU;
case LT: return LT;
case LTU: return LTU;
case LE: return LT;
case LEU: return LTU;
default:
gcc_unreachable ();
}
}
/* Emit comparison instruction if necessary, returning the expression
that holds the compare result in the proper mode. Return the comparison

View file

@ -1309,6 +1309,186 @@
operands[2] = XEXP (t, 1);
})
;; Expand the cbranchdi pattern in order to avoid the default
;; expansion into word_mode operations, which is not efficient for PRU.
;; In pseudocode this expansion outputs:
;;
;; /* EQ */
;; if (OP1_hi {reverse_condition (cmp)} OP2_hi)
;; goto fallthrough
;; if (OP1_lo {cmp} OP2_lo)
;; goto label3
;; fallthrough:
;;
;; /* NE */
;; if (OP1_hi {cmp} OP2_hi)
;; goto label3
;; if (OP1_lo {cmp} OP2_lo)
;; goto label3
;;
;; The LT comparisons with zero take one machine instruction to simply
;; check the sign bit. The GT comparisons with zero take two - one
;; to check the sign bit, and one to check for zero. Hence arrange
;; the expand such that only LT comparison is used for OP1_HI, because
;; OP2_HI is const0_rtx.
;;
;; The LTU comparisons with zero will be removed by subsequent passes.
;;
;; /* LT/LTU/LE/LEU */
;; if (OP1_hi {noteq_condition (cmp)} OP2_hi)
;; goto label3 /* DI comparison obviously true. */
;; if (OP1_hi != OP2_hi)
;; goto fallthrough /* DI comparison obviously not true. */
;; if (OP1_lo {unsigned_condition (cmp)} OP2_lo)
;; goto label3 /* Comparison was deferred to lo parts. */
;; fallthrough:
;; /* GT/GTU/GE/GEU */
;; if (OP1_hi {reverse_condition (noteq_condition (cmp))} OP2_hi)
;; goto fallthrough /* DI comparison obviously not true. */
;; if (OP1_hi != OP2_hi)
;; goto label3 /* DI comparison obviously true. */
;; if (OP1_lo {unsigned_condition (cmp)} OP2_lo)
;; goto label3 /* Comparison was deferred to lo parts. */
;; fallthrough:
(define_expand "cbranchdi4"
[(set (pc)
(if_then_else
(match_operator 0 "ordered_comparison_operator"
[(match_operand:DI 1 "register_operand")
(match_operand:DI 2 "reg_or_ubyte_operand")])
(label_ref (match_operand 3 ""))
(pc)))]
""
{
const enum rtx_code code = GET_CODE (operands[0]);
rtx label3 = operands[3];
rtx op1_lo = simplify_gen_subreg (SImode, operands[1], DImode, 0);
rtx op1_hi = simplify_gen_subreg (SImode, operands[1], DImode, 4);
rtx op2_lo = simplify_gen_subreg (SImode, operands[2], DImode, 0);
rtx op2_hi = simplify_gen_subreg (SImode, operands[2], DImode, 4);
rtx j;
if (code == EQ)
{
rtx label_fallthrough = gen_label_rtx ();
rtx label_fallthrough_ref = gen_rtx_LABEL_REF (Pmode, label_fallthrough);
rtx cond_hi = gen_rtx_fmt_ee (NE, VOIDmode, op1_hi, op2_hi);
rtx check_hi = gen_rtx_IF_THEN_ELSE (VOIDmode, cond_hi,
label_fallthrough_ref, pc_rtx);
j = emit_jump_insn (gen_rtx_SET (pc_rtx, check_hi));
JUMP_LABEL (j) = label_fallthrough;
LABEL_NUSES (label_fallthrough)++;
rtx label3_ref = gen_rtx_LABEL_REF (Pmode, label3);
rtx cond_lo = gen_rtx_fmt_ee (EQ, VOIDmode, op1_lo, op2_lo);
rtx check_lo = gen_rtx_IF_THEN_ELSE (VOIDmode, cond_lo,
label3_ref, pc_rtx);
j = emit_jump_insn (gen_rtx_SET (pc_rtx, check_lo));
JUMP_LABEL (j) = label3;
LABEL_NUSES (label3)++;
emit_label (label_fallthrough);
DONE;
}
if (code == NE)
{
rtx label3_ref1 = gen_rtx_LABEL_REF (Pmode, label3);
rtx cond_hi = gen_rtx_fmt_ee (NE, VOIDmode, op1_hi, op2_hi);
rtx check_hi = gen_rtx_IF_THEN_ELSE (VOIDmode, cond_hi,
label3_ref1, pc_rtx);
j = emit_jump_insn (gen_rtx_SET (pc_rtx, check_hi));
JUMP_LABEL (j) = label3;
LABEL_NUSES (label3)++;
rtx label3_ref2 = gen_rtx_LABEL_REF (Pmode, label3);
rtx cond_lo = gen_rtx_fmt_ee (NE, VOIDmode, op1_lo, op2_lo);
rtx check_lo = gen_rtx_IF_THEN_ELSE (VOIDmode, cond_lo,
label3_ref2, pc_rtx);
j = emit_jump_insn (gen_rtx_SET (pc_rtx, check_lo));
JUMP_LABEL (j) = label3;
LABEL_NUSES (label3)++;
DONE;
}
if (code == LT || code == LTU || code == LE || code == LEU)
{
/* Check for "DI comparison obviously true". */
rtx label3_ref1 = gen_rtx_LABEL_REF (Pmode, label3);
rtx cond_hi = gen_rtx_fmt_ee (pru_noteq_condition (code),
VOIDmode, op1_hi, op2_hi);
rtx check_hi = gen_rtx_IF_THEN_ELSE (VOIDmode, cond_hi,
label3_ref1, pc_rtx);
j = emit_jump_insn (gen_rtx_SET (pc_rtx, check_hi));
JUMP_LABEL (j) = label3;
LABEL_NUSES (label3)++;
/* Check for "DI comparison obviously not true". */
rtx label_fallthrough = gen_label_rtx ();
rtx label_fallthrough_ref = gen_rtx_LABEL_REF (Pmode, label_fallthrough);
rtx cond_hine = gen_rtx_fmt_ee (NE, VOIDmode, op1_hi, op2_hi);
rtx check_hine = gen_rtx_IF_THEN_ELSE (VOIDmode, cond_hine,
label_fallthrough_ref, pc_rtx);
j = emit_jump_insn (gen_rtx_SET (pc_rtx, check_hine));
JUMP_LABEL (j) = label_fallthrough;
LABEL_NUSES (label_fallthrough)++;
/* Comparison deferred to the lo parts. */
rtx label3_ref2 = gen_rtx_LABEL_REF (Pmode, label3);
rtx cond_lo = gen_rtx_fmt_ee (unsigned_condition (code),
VOIDmode, op1_lo, op2_lo);
rtx check_lo = gen_rtx_IF_THEN_ELSE (VOIDmode, cond_lo,
label3_ref2, pc_rtx);
j = emit_jump_insn (gen_rtx_SET (pc_rtx, check_lo));
JUMP_LABEL (j) = label3;
LABEL_NUSES (label3)++;
emit_label (label_fallthrough);
DONE;
}
if (code == GT || code == GTU || code == GE || code == GEU)
{
/* Check for "DI comparison obviously not true". */
const enum rtx_code reversed_code = reverse_condition (code);
rtx label_fallthrough = gen_label_rtx ();
rtx label_fallthrough_ref = gen_rtx_LABEL_REF (Pmode, label_fallthrough);
rtx cond_hi = gen_rtx_fmt_ee (pru_noteq_condition (reversed_code),
VOIDmode, op1_hi, op2_hi);
rtx check_hi = gen_rtx_IF_THEN_ELSE (VOIDmode, cond_hi,
label_fallthrough_ref, pc_rtx);
j = emit_jump_insn (gen_rtx_SET (pc_rtx, check_hi));
JUMP_LABEL (j) = label_fallthrough;
LABEL_NUSES (label_fallthrough)++;
/* Check for "DI comparison obviously true". */
rtx label3_ref1 = gen_rtx_LABEL_REF (Pmode, label3);
rtx cond_hine = gen_rtx_fmt_ee (NE, VOIDmode, op1_hi, op2_hi);
rtx check_hine = gen_rtx_IF_THEN_ELSE (VOIDmode, cond_hine,
label3_ref1, pc_rtx);
j = emit_jump_insn (gen_rtx_SET (pc_rtx, check_hine));
JUMP_LABEL (j) = label3;
LABEL_NUSES (label3)++;
/* Comparison deferred to the lo parts. */
rtx label3_ref2 = gen_rtx_LABEL_REF (Pmode, label3);
rtx cond_lo = gen_rtx_fmt_ee (unsigned_condition (code),
VOIDmode, op1_lo, op2_lo);
rtx check_lo = gen_rtx_IF_THEN_ELSE (VOIDmode, cond_lo,
label3_ref2, pc_rtx);
j = emit_jump_insn (gen_rtx_SET (pc_rtx, check_lo));
JUMP_LABEL (j) = label3;
LABEL_NUSES (label3)++;
emit_label (label_fallthrough);
DONE;
}
gcc_unreachable ();
})
;
; Bit test branch

View file

@ -0,0 +1,9 @@
/* { dg-do assemble } */
/* { dg-options "-Os" } */
/* { dg-final { object-size text <= 40 } } */
char test(unsigned long long a, unsigned long long b)
{
return a && b;
}

View file

@ -0,0 +1,9 @@
/* { dg-do assemble } */
/* { dg-options "-Os" } */
/* { dg-final { object-size text <= 32 } } */
char test(long long a)
{
return a > 10;
}

View file

@ -0,0 +1,9 @@
/* { dg-do assemble } */
/* { dg-options "-Os" } */
/* { dg-final { object-size text <= 32 } } */
char test(long long a)
{
return a < 10;
}

View file

@ -0,0 +1,159 @@
/* Functional test for DI comparisons. */
/* { dg-do run } */
/* { dg-options "-pedantic-errors" } */
/* The default test suite options use "-ansi", which
generates spurious errors by enabling "-Wlong-long".
Thus override the options and drop "-ansi", in order
to freely use 64-bit (long long) types for PRU. */
#include <stddef.h>
#include <stdint.h>
extern void abort (void);
char __attribute__((noinline)) test_lt (int64_t a, int64_t b)
{
return a < b;
}
char __attribute__((noinline)) test_ltu (uint64_t a, uint64_t b)
{
return a < b;
}
char __attribute__((noinline)) test_le (int64_t a, int64_t b)
{
return a <= b;
}
char __attribute__((noinline)) test_leu (uint64_t a, uint64_t b)
{
return a <= b;
}
char __attribute__((noinline)) test_gt (int64_t a, int64_t b)
{
return a > b;
}
char __attribute__((noinline)) test_gtu (uint64_t a, uint64_t b)
{
return a > b;
}
char __attribute__((noinline)) test_ge (int64_t a, int64_t b)
{
return a >= b;
}
char __attribute__((noinline)) test_geu (uint64_t a, uint64_t b)
{
return a >= b;
}
char __attribute__((noinline)) test_eq (uint64_t a, uint64_t b)
{
return a == b;
}
char __attribute__((noinline)) test_ne (uint64_t a, uint64_t b)
{
return a != b;
}
struct test_case {
uint64_t a;
uint64_t b;
char lt;
char ltu;
char le;
char leu;
char gt;
char gtu;
char ge;
char geu;
char eq;
char ne;
};
const struct test_case cases[] = {
/* LT,LTU,LE,LEU,GT,GTU,GE,GEU,EQ,NE */
{ 0x1234567800112233ULL,
0x1234567800112233ULL, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0 },
{ 0x0000000000000000ULL,
0x0000000000000000ULL, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0 },
{ 0xffffffffffffffffULL,
0xffffffffffffffffULL, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0 },
{ 0xffffffffffffffefULL,
0xffffffffffffffffULL, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1 },
{ 0x8000000000000000ULL,
0xffffffffffffffffULL, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1 },
{ 0x80000000ffffffffULL,
0xffffffffffffffffULL, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1 },
{ 0x80000000ffffffffULL,
0xffffffff00000000ULL, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1 },
{ 0xffefffffffffffffULL,
0xffffffffffffffffULL, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1 },
{ 0x0000000000000000ULL,
0xffffffffffffffffULL, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1 },
{ 0x0000000000000001ULL,
0xffffffffffffffffULL, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1 },
{ 0x0000000000000001ULL,
0x8000000000000000ULL, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1 },
{ 0x7fffffffffffffffULL,
0x8000000000000000ULL, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1 },
/* Ensure lo uses unsigned comparison if hi parts are same. */
{ 0x12345678ffffffffULL,
0x1234567800000001ULL, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1 },
{ 0xf23456780fffffffULL,
0xf234567800000001ULL, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1 },
{ 0xf2345678ffffffffULL,
0xf234567800000001ULL, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1 },
{ 0x1234567800000002ULL,
0x1234567800000001ULL, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1 },
{ 0x1234567800000002ULL,
0x1234567800000003ULL, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1 },
};
int
main (void)
{
size_t i;
for (i = 0; i < (sizeof (cases)/sizeof (cases[0])); i++)
{
const int64_t sa = (int64_t)cases[i].a;
const int64_t sb = (int64_t)cases[i].b;
const uint64_t ua = cases[i].a;
const uint64_t ub = cases[i].b;
if (cases[i].lt != test_lt (sa, sb))
abort ();
if (cases[i].ltu != test_ltu (ua, ub))
abort ();
if (cases[i].le != test_le (sa, sb))
abort ();
if (cases[i].leu != test_leu (ua, ub))
abort ();
if (cases[i].gt != test_gt (sa, sb))
abort ();
if (cases[i].gtu != test_gtu (ua, ub))
abort ();
if (cases[i].ge != test_ge (sa, sb))
abort ();
if (cases[i].geu != test_geu (ua, ub))
abort ();
if (cases[i].eq != test_eq (ua, ub))
abort ();
if (cases[i].ne != test_ne (ua, ub))
abort ();
}
return 0;
}