Add pre_reload splitter to detect fp min/max pattern.

We have ix86_expand_sse_fp_minmax to detect min/max sematics, but
it requires rtx_equal_p for cmp_op0/cmp_op1 and if_true/if_false, for
the testcase in the PR, there's an extra move from cmp_op0 to if_true,
and it failed ix86_expand_sse_fp_minmax.

This patch adds pre_reload splitter to detect the min/max pattern.

Operands order in MINSS matters for signed zero and NANs, since the
instruction always returns second operand when any operand is NAN or
both operands are zero.

gcc/ChangeLog:

	PR target/110170
	* config/i386/i386.md (*ieee_max<mode>3_1): New pre_reload
	splitter to detect fp max pattern.
	(*ieee_min<mode>3_1): Ditto, but for fp min pattern.

gcc/testsuite/ChangeLog:

	* g++.target/i386/pr110170.C: New test.
	* gcc.target/i386/pr110170.c: New test.
This commit is contained in:
liuhongt 2023-07-03 18:19:19 +08:00
parent 361a6fc4bc
commit d41a57c46d
3 changed files with 154 additions and 0 deletions

View file

@ -23163,6 +23163,49 @@
(set_attr "type" "sseadd")
(set_attr "mode" "<MODE>")])
;; Operands order in min/max instruction matters for signed zero and NANs.
(define_insn_and_split "*ieee_max<mode>3_1"
[(set (match_operand:MODEF 0 "register_operand")
(unspec:MODEF
[(match_operand:MODEF 1 "register_operand")
(match_operand:MODEF 2 "register_operand")
(lt:MODEF
(match_operand:MODEF 3 "register_operand")
(match_operand:MODEF 4 "register_operand"))]
UNSPEC_BLENDV))]
"SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
&& (rtx_equal_p (operands[1], operands[3])
&& rtx_equal_p (operands[2], operands[4]))
&& ix86_pre_reload_split ()"
"#"
"&& 1"
[(set (match_dup 0)
(unspec:MODEF
[(match_dup 2)
(match_dup 1)]
UNSPEC_IEEE_MAX))])
(define_insn_and_split "*ieee_min<mode>3_1"
[(set (match_operand:MODEF 0 "register_operand")
(unspec:MODEF
[(match_operand:MODEF 1 "register_operand")
(match_operand:MODEF 2 "register_operand")
(lt:MODEF
(match_operand:MODEF 3 "register_operand")
(match_operand:MODEF 4 "register_operand"))]
UNSPEC_BLENDV))]
"SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH
&& (rtx_equal_p (operands[1], operands[4])
&& rtx_equal_p (operands[2], operands[3]))
&& ix86_pre_reload_split ()"
"#"
"&& 1"
[(set (match_dup 0)
(unspec:MODEF
[(match_dup 2)
(match_dup 1)]
UNSPEC_IEEE_MIN))])
;; Make two stack loads independent:
;; fld aa fld aa
;; fld %st(0) -> fld bb

View file

@ -0,0 +1,90 @@
/* { dg-do run { target sse4 } } */
/* { dg-options " -O2 -msse4.1 -mfpmath=sse -std=gnu++20" } */
#include <math.h>
#ifndef CHECK_H
#define CHECK_H "sse4_1-check.h"
#endif
#ifndef TEST
#define TEST sse4_1_test
#endif
#include CHECK_H
void
__attribute__((noinline))
__cond_swap(double* __x, double* __y) {
bool __r = (*__x < *__y);
auto __tmp = __r ? *__x : *__y;
*__y = __r ? *__y : *__x;
*__x = __tmp;
}
auto test1() {
double nan = -0.0;
double x = 0.0;
__cond_swap(&nan, &x);
return x == -0.0 && nan == 0.0;
}
auto test1r() {
double nan = NAN;
double x = 1.0;
__cond_swap(&x, &nan);
return isnan(x) && signbit(x) == 0 && nan == 1.0;
}
auto test2() {
double nan = NAN;
double x = -1.0;
__cond_swap(&nan, &x);
return isnan(x) && signbit(x) == 0 && nan == -1.0;
}
auto test2r() {
double nan = NAN;
double x = -1.0;
__cond_swap(&x, &nan);
return isnan(x) && signbit(x) == 0 && nan == -1.0;
}
auto test3() {
double nan = -NAN;
double x = 1.0;
__cond_swap(&nan, &x);
return isnan(x) && signbit(x) == 1 && nan == 1.0;
}
auto test3r() {
double nan = -NAN;
double x = 1.0;
__cond_swap(&x, &nan);
return isnan(x) && signbit(x) == 1 && nan == 1.0;
}
auto test4() {
double nan = -NAN;
double x = -1.0;
__cond_swap(&nan, &x);
return isnan(x) && signbit(x) == 1 && nan == -1.0;
}
auto test4r() {
double nan = -NAN;
double x = -1.0;
__cond_swap(&x, &nan);
return isnan(x) && signbit(x) == 1 && nan == -1.0;
}
static void
TEST()
{
if (
!test1() || !test1r()
|| !test2() || !test2r()
|| !test3() || !test4r()
|| !test4() || !test4r()
) __builtin_abort();
}

View file

@ -0,0 +1,21 @@
/* { dg-do compile } */
/* { dg-options " -O2 -msse4.1 -mfpmath=sse" } */
/* { dg-final { scan-assembler-times {(?n)mins[sd]} 2 { target { ! ia32 } } } } */
/* { dg-final { scan-assembler-times {(?n)maxs[sd]} 2 { target { ! ia32 } } } } */
/* Ideally cond_swap_df is also optimized to minsd/maxsd. */
/* { dg-final { scan-assembler-times {(?n)mins[sd]} 1 { target ia32 } } } */
/* { dg-final { scan-assembler-times {(?n)maxs[sd]} 1 { target ia32 } } } */
void __cond_swap_df(double* __x, double* __y) {
_Bool __r = (*__x < *__y);
double __tmp = __r ? *__x : *__y;
*__y = __r ? *__y : *__x;
*__x = __tmp;
}
void __cond_swap_sf(float* __x, float* __y) {
_Bool __r = (*__x < *__y);
float __tmp = __r ? *__x : *__y;
*__y = __r ? *__y : *__x;
*__x = __tmp;
}