i386: Support cstorebf4 with native bf16 comi
We recently supports cbranchbf4 with AVX10_2 native bf16 comi instructions, so do similar to cstorebf4. gcc/ChangeLog: * config/i386/i386.md (cstorebf4): Use vcomsbf16 under TARGET_AVX10_2_256 and -fno-trapping-math. (cbranchbf4): Adjust formatting. gcc/testsuite/ChangeLog: * gcc.target/i386/avx10_2-comibf-3.c: New test. * gcc.target/i386/avx10_2-comibf-4.c: Likewise.
This commit is contained in:
parent
4473cf8409
commit
50ecb6e960
3 changed files with 82 additions and 10 deletions
|
@ -1818,10 +1818,8 @@
|
|||
"TARGET_80387 || (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH)"
|
||||
{
|
||||
if (TARGET_AVX10_2_256 && !flag_trapping_math)
|
||||
{
|
||||
ix86_expand_branch (GET_CODE (operands[0]),
|
||||
operands[1], operands[2], operands[3]);
|
||||
}
|
||||
ix86_expand_branch (GET_CODE (operands[0]),
|
||||
operands[1], operands[2], operands[3]);
|
||||
else
|
||||
{
|
||||
rtx op1 = ix86_expand_fast_convert_bf_to_sf (operands[1]);
|
||||
|
@ -1860,12 +1858,18 @@
|
|||
(const_int 0)]))]
|
||||
"TARGET_80387 || (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH)"
|
||||
{
|
||||
rtx op1 = ix86_expand_fast_convert_bf_to_sf (operands[2]);
|
||||
rtx op2 = ix86_expand_fast_convert_bf_to_sf (operands[3]);
|
||||
rtx res = emit_store_flag_force (operands[0], GET_CODE (operands[1]),
|
||||
op1, op2, SFmode, 0, 1);
|
||||
if (!rtx_equal_p (res, operands[0]))
|
||||
emit_move_insn (operands[0], res);
|
||||
if (TARGET_AVX10_2_256 && !flag_trapping_math)
|
||||
ix86_expand_setcc (operands[0], GET_CODE (operands[1]),
|
||||
operands[2], operands[3]);
|
||||
else
|
||||
{
|
||||
rtx op1 = ix86_expand_fast_convert_bf_to_sf (operands[2]);
|
||||
rtx op2 = ix86_expand_fast_convert_bf_to_sf (operands[3]);
|
||||
rtx res = emit_store_flag_force (operands[0], GET_CODE (operands[1]),
|
||||
op1, op2, SFmode, 0, 1);
|
||||
if (!rtx_equal_p (res, operands[0]))
|
||||
emit_move_insn (operands[0], res);
|
||||
}
|
||||
DONE;
|
||||
})
|
||||
|
||||
|
|
27
gcc/testsuite/gcc.target/i386/avx10_2-comibf-3.c
Normal file
27
gcc/testsuite/gcc.target/i386/avx10_2-comibf-3.c
Normal file
|
@ -0,0 +1,27 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-march=x86-64-v3 -O2" } */
|
||||
|
||||
/* { dg-final { scan-assembler-times "vcomsbf16\[ \\t\]+\[^{}\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 6 } } */
|
||||
/* { dg-final { scan-assembler-times "set\[aeglnb\]+" 6 } } */
|
||||
|
||||
#define AVX10_ATTR \
|
||||
__attribute__((noinline, __target__("avx10.2"), optimize("no-trapping-math")))
|
||||
|
||||
AVX10_ATTR
|
||||
int foo1_avx10 (__bf16 a, __bf16 b, __bf16 c, __bf16 d)
|
||||
{
|
||||
return a == b && c < d;
|
||||
}
|
||||
|
||||
AVX10_ATTR
|
||||
int foo2_avx10 (__bf16 a, __bf16 b, __bf16 c, __bf16 d)
|
||||
{
|
||||
return a > b || c != d;
|
||||
}
|
||||
|
||||
AVX10_ATTR
|
||||
int foo3_avx10 (__bf16 a, __bf16 b, __bf16 c, __bf16 d)
|
||||
{
|
||||
return (a >= b) * (c <= d);
|
||||
}
|
||||
|
41
gcc/testsuite/gcc.target/i386/avx10_2-comibf-4.c
Normal file
41
gcc/testsuite/gcc.target/i386/avx10_2-comibf-4.c
Normal file
|
@ -0,0 +1,41 @@
|
|||
/* { dg-do run { target { avx10_2 } } } */
|
||||
/* { dg-options "-march=x86-64-v3 -O2" } */
|
||||
|
||||
#include "avx10_2-comibf-3.c"
|
||||
|
||||
__attribute__((noinline))
|
||||
int foo1 (__bf16 a, __bf16 b, __bf16 c, __bf16 d)
|
||||
{
|
||||
return a == b && c < d;
|
||||
}
|
||||
|
||||
__attribute__((noinline))
|
||||
int foo2 (__bf16 a, __bf16 b, __bf16 c, __bf16 d)
|
||||
{
|
||||
return a > b || c != d;
|
||||
}
|
||||
|
||||
__attribute__((noinline))
|
||||
int foo3 (__bf16 a, __bf16 b, __bf16 c, __bf16 d)
|
||||
{
|
||||
return (a >= b) * (c <= d);
|
||||
}
|
||||
|
||||
|
||||
int main (void)
|
||||
{
|
||||
if (!__builtin_cpu_supports ("avx10.2"))
|
||||
return 0;
|
||||
|
||||
__bf16 a = 0.5bf16, b = -0.25bf16, c = 1.75bf16, d = -0.125bf16;
|
||||
|
||||
if (foo1_avx10 (a, b, c, d) != foo1 (a, b, c, d))
|
||||
__builtin_abort ();
|
||||
|
||||
if (foo2_avx10 (b, c, d, a) != foo2 (b, c, d, a))
|
||||
__builtin_abort ();
|
||||
|
||||
if (foo3_avx10 (c, d, a, b) != foo3 (c, d, a, b))
|
||||
__builtin_abort ();
|
||||
}
|
||||
|
Loading…
Add table
Reference in a new issue