Add pre_reload splitter to detect fp min/max pattern.
We have ix86_expand_sse_fp_minmax to detect min/max sematics, but it requires rtx_equal_p for cmp_op0/cmp_op1 and if_true/if_false, for the testcase in the PR, there's an extra move from cmp_op0 to if_true, and it failed ix86_expand_sse_fp_minmax. This patch adds pre_reload splitter to detect the min/max pattern. Operands order in MINSS matters for signed zero and NANs, since the instruction always returns second operand when any operand is NAN or both operands are zero. gcc/ChangeLog: PR target/110170 * config/i386/i386.md (*ieee_max<mode>3_1): New pre_reload splitter to detect fp max pattern. (*ieee_min<mode>3_1): Ditto, but for fp min pattern. gcc/testsuite/ChangeLog: * g++.target/i386/pr110170.C: New test. * gcc.target/i386/pr110170.c: New test.
This commit is contained in:
parent
361a6fc4bc
commit
d41a57c46d
3 changed files with 154 additions and 0 deletions
|
@ -23163,6 +23163,49 @@
|
|||
(set_attr "type" "sseadd")
|
||||
(set_attr "mode" "<MODE>")])
|
||||
|
||||
;; Operands order in min/max instruction matters for signed zero and NANs.
|
||||
(define_insn_and_split "*ieee_max<mode>3_1"
|
||||
[(set (match_operand:MODEF 0 "register_operand")
|
||||
(unspec:MODEF
|
||||
[(match_operand:MODEF 1 "register_operand")
|
||||
(match_operand:MODEF 2 "register_operand")
|
||||
(lt:MODEF
|
||||
(match_operand:MODEF 3 "register_operand")
|
||||
(match_operand:MODEF 4 "register_operand"))]
|
||||
UNSPEC_BLENDV))]
|
||||
"SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
|
||||
&& (rtx_equal_p (operands[1], operands[3])
|
||||
&& rtx_equal_p (operands[2], operands[4]))
|
||||
&& ix86_pre_reload_split ()"
|
||||
"#"
|
||||
"&& 1"
|
||||
[(set (match_dup 0)
|
||||
(unspec:MODEF
|
||||
[(match_dup 2)
|
||||
(match_dup 1)]
|
||||
UNSPEC_IEEE_MAX))])
|
||||
|
||||
(define_insn_and_split "*ieee_min<mode>3_1"
|
||||
[(set (match_operand:MODEF 0 "register_operand")
|
||||
(unspec:MODEF
|
||||
[(match_operand:MODEF 1 "register_operand")
|
||||
(match_operand:MODEF 2 "register_operand")
|
||||
(lt:MODEF
|
||||
(match_operand:MODEF 3 "register_operand")
|
||||
(match_operand:MODEF 4 "register_operand"))]
|
||||
UNSPEC_BLENDV))]
|
||||
"SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
|
||||
&& (rtx_equal_p (operands[1], operands[4])
|
||||
&& rtx_equal_p (operands[2], operands[3]))
|
||||
&& ix86_pre_reload_split ()"
|
||||
"#"
|
||||
"&& 1"
|
||||
[(set (match_dup 0)
|
||||
(unspec:MODEF
|
||||
[(match_dup 2)
|
||||
(match_dup 1)]
|
||||
UNSPEC_IEEE_MIN))])
|
||||
|
||||
;; Make two stack loads independent:
|
||||
;; fld aa fld aa
|
||||
;; fld %st(0) -> fld bb
|
||||
|
|
90
gcc/testsuite/g++.target/i386/pr110170.C
Normal file
90
gcc/testsuite/g++.target/i386/pr110170.C
Normal file
|
@ -0,0 +1,90 @@
|
|||
/* { dg-do run { target sse4 } } */
|
||||
/* { dg-options " -O2 -msse4.1 -mfpmath=sse -std=gnu++20" } */
|
||||
#include <math.h>
|
||||
|
||||
#ifndef CHECK_H
|
||||
#define CHECK_H "sse4_1-check.h"
|
||||
#endif
|
||||
|
||||
#ifndef TEST
|
||||
#define TEST sse4_1_test
|
||||
#endif
|
||||
|
||||
#include CHECK_H
|
||||
|
||||
void
|
||||
__attribute__((noinline))
|
||||
__cond_swap(double* __x, double* __y) {
|
||||
bool __r = (*__x < *__y);
|
||||
auto __tmp = __r ? *__x : *__y;
|
||||
*__y = __r ? *__y : *__x;
|
||||
*__x = __tmp;
|
||||
}
|
||||
|
||||
auto test1() {
|
||||
double nan = -0.0;
|
||||
double x = 0.0;
|
||||
__cond_swap(&nan, &x);
|
||||
return x == -0.0 && nan == 0.0;
|
||||
}
|
||||
|
||||
auto test1r() {
|
||||
double nan = NAN;
|
||||
double x = 1.0;
|
||||
__cond_swap(&x, &nan);
|
||||
return isnan(x) && signbit(x) == 0 && nan == 1.0;
|
||||
}
|
||||
|
||||
auto test2() {
|
||||
double nan = NAN;
|
||||
double x = -1.0;
|
||||
__cond_swap(&nan, &x);
|
||||
return isnan(x) && signbit(x) == 0 && nan == -1.0;
|
||||
}
|
||||
|
||||
auto test2r() {
|
||||
double nan = NAN;
|
||||
double x = -1.0;
|
||||
__cond_swap(&x, &nan);
|
||||
return isnan(x) && signbit(x) == 0 && nan == -1.0;
|
||||
}
|
||||
|
||||
auto test3() {
|
||||
double nan = -NAN;
|
||||
double x = 1.0;
|
||||
__cond_swap(&nan, &x);
|
||||
return isnan(x) && signbit(x) == 1 && nan == 1.0;
|
||||
}
|
||||
|
||||
auto test3r() {
|
||||
double nan = -NAN;
|
||||
double x = 1.0;
|
||||
__cond_swap(&x, &nan);
|
||||
return isnan(x) && signbit(x) == 1 && nan == 1.0;
|
||||
}
|
||||
|
||||
auto test4() {
|
||||
double nan = -NAN;
|
||||
double x = -1.0;
|
||||
__cond_swap(&nan, &x);
|
||||
return isnan(x) && signbit(x) == 1 && nan == -1.0;
|
||||
}
|
||||
|
||||
auto test4r() {
|
||||
double nan = -NAN;
|
||||
double x = -1.0;
|
||||
__cond_swap(&x, &nan);
|
||||
return isnan(x) && signbit(x) == 1 && nan == -1.0;
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
TEST()
|
||||
{
|
||||
if (
|
||||
!test1() || !test1r()
|
||||
|| !test2() || !test2r()
|
||||
|| !test3() || !test4r()
|
||||
|| !test4() || !test4r()
|
||||
) __builtin_abort();
|
||||
}
|
21
gcc/testsuite/gcc.target/i386/pr110170.c
Normal file
21
gcc/testsuite/gcc.target/i386/pr110170.c
Normal file
|
@ -0,0 +1,21 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options " -O2 -msse4.1 -mfpmath=sse" } */
|
||||
/* { dg-final { scan-assembler-times {(?n)mins[sd]} 2 { target { ! ia32 } } } } */
|
||||
/* { dg-final { scan-assembler-times {(?n)maxs[sd]} 2 { target { ! ia32 } } } } */
|
||||
/* Ideally cond_swap_df is also optimized to minsd/maxsd. */
|
||||
/* { dg-final { scan-assembler-times {(?n)mins[sd]} 1 { target ia32 } } } */
|
||||
/* { dg-final { scan-assembler-times {(?n)maxs[sd]} 1 { target ia32 } } } */
|
||||
|
||||
void __cond_swap_df(double* __x, double* __y) {
|
||||
_Bool __r = (*__x < *__y);
|
||||
double __tmp = __r ? *__x : *__y;
|
||||
*__y = __r ? *__y : *__x;
|
||||
*__x = __tmp;
|
||||
}
|
||||
|
||||
void __cond_swap_sf(float* __x, float* __y) {
|
||||
_Bool __r = (*__x < *__y);
|
||||
float __tmp = __r ? *__x : *__y;
|
||||
*__y = __r ? *__y : *__x;
|
||||
*__x = __tmp;
|
||||
}
|
Loading…
Add table
Reference in a new issue