i386: Support vectorized BF16 smaxmin with AVX10.2 instructions
gcc/ChangeLog: * config/i386/sse.md (<code><mode>3): New define expand pattern for BF smaxmin. gcc/testsuite/ChangeLog: * gcc.target/i386/avx10_2-512-bf-vector-smaxmin-1.c: New test. * gcc.target/i386/avx10_2-bf-vector-smaxmin-1.c: New test.
This commit is contained in:
parent
6d294fb8ac
commit
29ef601973
3 changed files with 63 additions and 0 deletions
|
@ -31901,6 +31901,13 @@
|
|||
"vscalefpbf16\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
|
||||
[(set_attr "prefix" "evex")])
|
||||
|
||||
(define_expand "<code><mode>3"
|
||||
[(set (match_operand:VBF_AVX10_2 0 "register_operand")
|
||||
(smaxmin:VBF_AVX10_2
|
||||
(match_operand:VBF_AVX10_2 1 "register_operand")
|
||||
(match_operand:VBF_AVX10_2 2 "nonimmediate_operand")))]
|
||||
"TARGET_AVX10_2_256")
|
||||
|
||||
(define_insn "avx10_2_<code>pbf16_<mode><mask_name>"
|
||||
[(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v")
|
||||
(smaxmin:VBF_AVX10_2
|
||||
|
|
|
@ -0,0 +1,20 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-mavx10.2-512 -mprefer-vector-width=512 -Ofast" } */
|
||||
/* /* { dg-final { scan-assembler-times "vmaxpbf16" 1 } } */
|
||||
/* /* { dg-final { scan-assembler-times "vminpbf16" 1 } } */
|
||||
|
||||
void
|
||||
maxpbf16_512 (__bf16* dest, __bf16* src1, __bf16* src2)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < 32; i++)
|
||||
dest[i] = src1[i] > src2[i] ? src1[i] : src2[i];
|
||||
}
|
||||
|
||||
void
|
||||
minpbf16_512 (__bf16* dest, __bf16* src1, __bf16* src2)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < 32; i++)
|
||||
dest[i] = src1[i] < src2[i] ? src1[i] : src2[i];
|
||||
}
|
36
gcc/testsuite/gcc.target/i386/avx10_2-bf-vector-smaxmin-1.c
Normal file
36
gcc/testsuite/gcc.target/i386/avx10_2-bf-vector-smaxmin-1.c
Normal file
|
@ -0,0 +1,36 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-mavx10.2 -Ofast" } */
|
||||
/* /* { dg-final { scan-assembler-times "vmaxpbf16" 2 } } */
|
||||
/* /* { dg-final { scan-assembler-times "vminpbf16" 2 } } */
|
||||
|
||||
void
|
||||
maxpbf16_256 (__bf16* dest, __bf16* src1, __bf16* src2)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < 16; i++)
|
||||
dest[i] = src1[i] > src2[i] ? src1[i] : src2[i];
|
||||
}
|
||||
|
||||
void
|
||||
minpbf16_256 (__bf16* dest, __bf16* src1, __bf16* src2)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < 16; i++)
|
||||
dest[i] = src1[i] < src2[i] ? src1[i] : src2[i];
|
||||
}
|
||||
|
||||
void
|
||||
maxpbf16_128 (__bf16* dest, __bf16* src1, __bf16* src2)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < 16; i++)
|
||||
dest[i] = src1[i] > src2[i] ? src1[i] : src2[i];
|
||||
}
|
||||
|
||||
void
|
||||
minpbf16_128 (__bf16* dest, __bf16* src1, __bf16* src2)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < 16; i++)
|
||||
dest[i] = src1[i] < src2[i] ? src1[i] : src2[i];
|
||||
}
|
Loading…
Add table
Reference in a new issue