aarch64: Emit single-instruction for smin (x, 0) and smax (x, 0)
Motivated by https://reviews.llvm.org/D148249, we can expand to a single instruction for the SMIN (x, 0) and SMAX (x, 0) cases using the combined AND/BIC and ASR operations. Given that we already have well-fitting TARGET_CSSC patterns and expanders for the min/max codes in the backend this patch does some minor refactoring to ensure we emit the right SMAX/SMIN RTL codes for TARGET_CSSC, fall back to the generic expanders or emit a simple SMIN/SMAX with 0 RTX for !TARGET_CSSC that is now matched by a separate pattern. Bootstrapped and tested on aarch64-none-linux-gnu. gcc/ChangeLog: * config/aarch64/aarch64.md (aarch64_umax<mode>3_insn): Delete. (umax<mode>3): Emit raw UMAX RTL instead of going through gen_ function for umax. (<optab><mode>3): New define_expand for MAXMIN_NOUMAX codes. (*aarch64_<optab><mode>3_zero): Define. (*aarch64_<optab><mode>3_cssc): Likewise. * config/aarch64/iterators.md (maxminand): New code attribute. gcc/testsuite/ChangeLog: * gcc.target/aarch64/sminmax-asr_1.c: New test.
This commit is contained in:
parent
573624ec90
commit
88195141ae
3 changed files with 97 additions and 15 deletions
|
@ -4412,17 +4412,6 @@
|
|||
[(set_attr "type" "csel")]
|
||||
)
|
||||
|
||||
(define_insn "aarch64_umax<mode>3_insn"
|
||||
[(set (match_operand:GPI 0 "register_operand" "=r,r")
|
||||
(umax:GPI (match_operand:GPI 1 "register_operand" "r,r")
|
||||
(match_operand:GPI 2 "aarch64_uminmax_operand" "r,Uum")))]
|
||||
"TARGET_CSSC"
|
||||
"@
|
||||
umax\\t%<w>0, %<w>1, %<w>2
|
||||
umax\\t%<w>0, %<w>1, %2"
|
||||
[(set_attr "type" "alu_sreg,alu_imm")]
|
||||
)
|
||||
|
||||
;; If X can be loaded by a single CNT[BHWD] instruction,
|
||||
;;
|
||||
;; A = UMAX (B, X)
|
||||
|
@ -4466,8 +4455,8 @@
|
|||
operands[1] = force_reg (<MODE>mode, operands[1]);
|
||||
if (!aarch64_uminmax_operand (operands[2], <MODE>mode))
|
||||
operands[2] = force_reg (<MODE>mode, operands[2]);
|
||||
emit_insn (gen_aarch64_umax<mode>3_insn (operands[0], operands[1],
|
||||
operands[2]));
|
||||
emit_move_insn (operands[0], gen_rtx_UMAX (<MODE>mode, operands[1],
|
||||
operands[2]));
|
||||
DONE;
|
||||
}
|
||||
else
|
||||
|
@ -6759,9 +6748,30 @@
|
|||
[(set_attr "type" "ffarith<stype>")]
|
||||
)
|
||||
|
||||
(define_insn "<optab><mode>3"
|
||||
;; Expander for integer smin, smax, umin. Mainly used to generate
|
||||
;; straightforward RTL for TARGET_CSSC. When that is not available
|
||||
;; FAIL and let the generic expanders generate the CMP + CSEL sequences,
|
||||
;; except for the SMIN and SMAX with zero cases, for which we have a
|
||||
;; single instruction even for the base architecture.
|
||||
(define_expand "<optab><mode>3"
|
||||
[(set (match_operand:GPI 0 "register_operand")
|
||||
(MAXMIN_NOUMAX:GPI
|
||||
(match_operand:GPI 1 "register_operand")
|
||||
(match_operand:GPI 2 "aarch64_<su>minmax_operand")))]
|
||||
""
|
||||
{
|
||||
if (!TARGET_CSSC)
|
||||
{
|
||||
if (operands[2] != CONST0_RTX (<MODE>mode)
|
||||
|| !(<CODE> == SMAX || <CODE> == SMIN))
|
||||
FAIL;
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
(define_insn "*aarch64_<optab><mode>3_cssc"
|
||||
[(set (match_operand:GPI 0 "register_operand" "=r,r")
|
||||
(MAXMIN_NOUMAX:GPI (match_operand:GPI 1 "register_operand" "r,r")
|
||||
(MAXMIN:GPI (match_operand:GPI 1 "register_operand" "r,r")
|
||||
(match_operand:GPI 2 "aarch64_<su>minmax_operand" "r,U<su>m")))]
|
||||
"TARGET_CSSC"
|
||||
"@
|
||||
|
@ -6770,6 +6780,16 @@
|
|||
[(set_attr "type" "alu_sreg,alu_imm")]
|
||||
)
|
||||
|
||||
(define_insn "*aarch64_<optab><mode>3_zero"
|
||||
[(set (match_operand:GPI 0 "register_operand" "=r")
|
||||
(FMAXMIN:GPI
|
||||
(match_operand:GPI 1 "register_operand" "r")
|
||||
(const_int 0)))]
|
||||
""
|
||||
"<maxminand>\\t%<w>0, %<w>1, %<w>1, asr <sizem1>";
|
||||
[(set_attr "type" "logic_shift_imm")]
|
||||
)
|
||||
|
||||
;; Given that smax/smin do not specify the result when either input is NaN,
|
||||
;; we could use either FMAXNM or FMAX for smax, and either FMINNM or FMIN
|
||||
;; for smin.
|
||||
|
|
|
@ -2435,6 +2435,8 @@
|
|||
(umax "max")
|
||||
(umin "min")])
|
||||
|
||||
(define_code_attr maxminand [(smax "bic") (smin "and")])
|
||||
|
||||
;; MLA/MLS attributes.
|
||||
(define_code_attr as [(ss_plus "a") (ss_minus "s")])
|
||||
|
||||
|
|
60
gcc/testsuite/gcc.target/aarch64/sminmax-asr_1.c
Normal file
60
gcc/testsuite/gcc.target/aarch64/sminmax-asr_1.c
Normal file
|
@ -0,0 +1,60 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-O --save-temps" } */
|
||||
/* { dg-final { check-function-bodies "**" "" "" } } */
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#pragma GCC target "+nocssc"
|
||||
|
||||
#define MAX(X, Y) ((X) > (Y) ? (X) : (Y))
|
||||
#define MIN(X, Y) ((X) < (Y) ? (X) : (Y))
|
||||
|
||||
|
||||
/*
|
||||
** minzero:
|
||||
** and w0, w0, w0, asr #31
|
||||
** ret
|
||||
*/
|
||||
|
||||
int32_t
|
||||
minzero (int32_t a)
|
||||
{
|
||||
return MIN (a, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
** maxzero:
|
||||
** bic w0, w0, w0, asr #31
|
||||
** ret
|
||||
*/
|
||||
|
||||
int32_t
|
||||
maxzero (int32_t a)
|
||||
{
|
||||
return MAX (a, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
** minzerol:
|
||||
** and x0, x0, x0, asr #63
|
||||
** ret
|
||||
*/
|
||||
|
||||
int64_t
|
||||
minzerol (int64_t a)
|
||||
{
|
||||
return MIN (a, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
** maxzerol:
|
||||
** bic x0, x0, x0, asr #63
|
||||
** ret
|
||||
*/
|
||||
|
||||
int64_t
|
||||
maxzerol (int64_t a)
|
||||
{
|
||||
return MAX (a, 0);
|
||||
}
|
||||
|
Loading…
Add table
Reference in a new issue