diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index e47ced1bb70..621cdd939ea 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -23163,6 +23163,49 @@ (set_attr "type" "sseadd") (set_attr "mode" "")]) +;; Operands order in min/max instruction matters for signed zero and NANs. +(define_insn_and_split "*ieee_max3_1" + [(set (match_operand:MODEF 0 "register_operand") + (unspec:MODEF + [(match_operand:MODEF 1 "register_operand") + (match_operand:MODEF 2 "register_operand") + (lt:MODEF + (match_operand:MODEF 3 "register_operand") + (match_operand:MODEF 4 "register_operand"))] + UNSPEC_BLENDV))] + "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && (rtx_equal_p (operands[1], operands[3]) + && rtx_equal_p (operands[2], operands[4])) + && ix86_pre_reload_split ()" + "#" + "&& 1" + [(set (match_dup 0) + (unspec:MODEF + [(match_dup 2) + (match_dup 1)] + UNSPEC_IEEE_MAX))]) + +(define_insn_and_split "*ieee_min3_1" + [(set (match_operand:MODEF 0 "register_operand") + (unspec:MODEF + [(match_operand:MODEF 1 "register_operand") + (match_operand:MODEF 2 "register_operand") + (lt:MODEF + (match_operand:MODEF 3 "register_operand") + (match_operand:MODEF 4 "register_operand"))] + UNSPEC_BLENDV))] + "SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH + && (rtx_equal_p (operands[1], operands[4]) + && rtx_equal_p (operands[2], operands[3])) + && ix86_pre_reload_split ()" + "#" + "&& 1" + [(set (match_dup 0) + (unspec:MODEF + [(match_dup 2) + (match_dup 1)] + UNSPEC_IEEE_MIN))]) + ;; Make two stack loads independent: ;; fld aa fld aa ;; fld %st(0) -> fld bb diff --git a/gcc/testsuite/g++.target/i386/pr110170.C b/gcc/testsuite/g++.target/i386/pr110170.C new file mode 100644 index 00000000000..e638b12a5ee --- /dev/null +++ b/gcc/testsuite/g++.target/i386/pr110170.C @@ -0,0 +1,90 @@ +/* { dg-do run { target sse4 } } */ +/* { dg-options " -O2 -msse4.1 -mfpmath=sse -std=gnu++20" } */ +#include + +#ifndef CHECK_H +#define CHECK_H "sse4_1-check.h" +#endif + +#ifndef TEST +#define TEST sse4_1_test +#endif + +#include CHECK_H + +void +__attribute__((noinline)) +__cond_swap(double* __x, double* __y) { + bool __r = (*__x < *__y); + auto __tmp = __r ? *__x : *__y; + *__y = __r ? *__y : *__x; + *__x = __tmp; +} + +auto test1() { + double nan = -0.0; + double x = 0.0; + __cond_swap(&nan, &x); + return x == -0.0 && nan == 0.0; +} + +auto test1r() { + double nan = NAN; + double x = 1.0; + __cond_swap(&x, &nan); + return isnan(x) && signbit(x) == 0 && nan == 1.0; +} + +auto test2() { + double nan = NAN; + double x = -1.0; + __cond_swap(&nan, &x); + return isnan(x) && signbit(x) == 0 && nan == -1.0; +} + +auto test2r() { + double nan = NAN; + double x = -1.0; + __cond_swap(&x, &nan); + return isnan(x) && signbit(x) == 0 && nan == -1.0; +} + +auto test3() { + double nan = -NAN; + double x = 1.0; + __cond_swap(&nan, &x); + return isnan(x) && signbit(x) == 1 && nan == 1.0; +} + +auto test3r() { + double nan = -NAN; + double x = 1.0; + __cond_swap(&x, &nan); + return isnan(x) && signbit(x) == 1 && nan == 1.0; +} + +auto test4() { + double nan = -NAN; + double x = -1.0; + __cond_swap(&nan, &x); + return isnan(x) && signbit(x) == 1 && nan == -1.0; +} + +auto test4r() { + double nan = -NAN; + double x = -1.0; + __cond_swap(&x, &nan); + return isnan(x) && signbit(x) == 1 && nan == -1.0; +} + + +static void +TEST() +{ + if ( + !test1() || !test1r() + || !test2() || !test2r() + || !test3() || !test4r() + || !test4() || !test4r() + ) __builtin_abort(); +} diff --git a/gcc/testsuite/gcc.target/i386/pr110170.c b/gcc/testsuite/gcc.target/i386/pr110170.c new file mode 100644 index 00000000000..c72f73398a1 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr110170.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options " -O2 -msse4.1 -mfpmath=sse" } */ +/* { dg-final { scan-assembler-times {(?n)mins[sd]} 2 { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-times {(?n)maxs[sd]} 2 { target { ! ia32 } } } } */ +/* Ideally cond_swap_df is also optimized to minsd/maxsd. */ +/* { dg-final { scan-assembler-times {(?n)mins[sd]} 1 { target ia32 } } } */ +/* { dg-final { scan-assembler-times {(?n)maxs[sd]} 1 { target ia32 } } } */ + +void __cond_swap_df(double* __x, double* __y) { + _Bool __r = (*__x < *__y); + double __tmp = __r ? *__x : *__y; + *__y = __r ? *__y : *__x; + *__x = __tmp; +} + +void __cond_swap_sf(float* __x, float* __y) { + _Bool __r = (*__x < *__y); + float __tmp = __r ? *__x : *__y; + *__y = __r ? *__y : *__x; + *__x = __tmp; +}