i386: Auto vectorize sdot_prod, usdot_prod, udot_prod with AVX10.2 instructions
gcc/ChangeLog: * config/i386/sse.md (VI1_AVX512VNNIBW): New. (VI2_AVX10_2): Ditto. (sdot_prod<mode>): Add AVX10.2 to auto vectorize and combine 512 bit part. (udot_prod<mode>): Ditto. (sdot_prodv64qi): Removed. (udot_prodv64qi): Ditto. (usdot_prod<mode>): Add AVX10.2 to auto vectorize. (udot_prod<mode>): Ditto. gcc/testsuite/ChangeLog: * gcc.target/i386/vnniint16-auto-vectorize-2.c: Only define TEST when not defined. * gcc.target/i386/vnniint8-auto-vectorize-2.c: Ditto. * gcc.target/i386/vnniint16-auto-vectorize-3.c: New test. * gcc.target/i386/vnniint16-auto-vectorize-4.c: Ditto. * gcc.target/i386/vnniint8-auto-vectorize-3.c: Ditto. * gcc.target/i386/vnniint8-auto-vectorize-4.c: Ditto.
This commit is contained in:
parent
5239902210
commit
b1f9fbb6da
7 changed files with 88 additions and 80 deletions
|
@ -610,6 +610,10 @@
|
|||
(define_mode_iterator VI1_AVX512VNNI
|
||||
[(V64QI "TARGET_AVX512VNNI && TARGET_EVEX512") (V32QI "TARGET_AVX2") V16QI])
|
||||
|
||||
(define_mode_iterator VI1_AVX512VNNIBW
|
||||
[(V64QI "(TARGET_AVX512BW || TARGET_AVX512VNNI) && TARGET_EVEX512")
|
||||
(V32QI "TARGET_AVX2") V16QI])
|
||||
|
||||
(define_mode_iterator VI12_256_512_AVX512VL
|
||||
[(V64QI "TARGET_EVEX512") (V32QI "TARGET_AVX512VL")
|
||||
(V32HI "TARGET_EVEX512") (V16HI "TARGET_AVX512VL")])
|
||||
|
@ -627,6 +631,9 @@
|
|||
[(V32HI "(TARGET_AVX512BW || TARGET_AVX512VNNI) && TARGET_EVEX512")
|
||||
(V16HI "TARGET_AVX2") V8HI])
|
||||
|
||||
(define_mode_iterator VI2_AVX10_2
|
||||
[(V32HI "TARGET_AVX10_2_512") V16HI V8HI])
|
||||
|
||||
(define_mode_iterator VI4_AVX
|
||||
[(V8SI "TARGET_AVX") V4SI])
|
||||
|
||||
|
@ -31232,12 +31239,13 @@
|
|||
|
||||
(define_expand "sdot_prod<mode>"
|
||||
[(match_operand:<ssedvecmode> 0 "register_operand")
|
||||
(match_operand:VI1_AVX2 1 "register_operand")
|
||||
(match_operand:VI1_AVX2 2 "register_operand")
|
||||
(match_operand:VI1_AVX512VNNIBW 1 "register_operand")
|
||||
(match_operand:VI1_AVX512VNNIBW 2 "register_operand")
|
||||
(match_operand:<ssedvecmode> 3 "register_operand")]
|
||||
"TARGET_SSE2"
|
||||
{
|
||||
if (TARGET_AVXVNNIINT8)
|
||||
if ((<MODE_SIZE> == 64 && TARGET_AVX10_2_512)
|
||||
|| (<MODE_SIZE> < 64 && (TARGET_AVXVNNIINT8 || TARGET_AVX10_2_256)))
|
||||
{
|
||||
operands[1] = lowpart_subreg (<ssedvecmode>mode,
|
||||
force_reg (<MODE>mode, operands[1]),
|
||||
|
@ -31276,44 +31284,15 @@
|
|||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "sdot_prodv64qi"
|
||||
[(match_operand:V16SI 0 "register_operand")
|
||||
(match_operand:V64QI 1 "register_operand")
|
||||
(match_operand:V64QI 2 "register_operand")
|
||||
(match_operand:V16SI 3 "register_operand")]
|
||||
"(TARGET_AVX512VNNI || TARGET_AVX512BW) && TARGET_EVEX512"
|
||||
{
|
||||
/* Emulate with vpdpwssd. */
|
||||
rtx op1_lo = gen_reg_rtx (V32HImode);
|
||||
rtx op1_hi = gen_reg_rtx (V32HImode);
|
||||
rtx op2_lo = gen_reg_rtx (V32HImode);
|
||||
rtx op2_hi = gen_reg_rtx (V32HImode);
|
||||
|
||||
emit_insn (gen_vec_unpacks_lo_v64qi (op1_lo, operands[1]));
|
||||
emit_insn (gen_vec_unpacks_lo_v64qi (op2_lo, operands[2]));
|
||||
emit_insn (gen_vec_unpacks_hi_v64qi (op1_hi, operands[1]));
|
||||
emit_insn (gen_vec_unpacks_hi_v64qi (op2_hi, operands[2]));
|
||||
|
||||
rtx res1 = gen_reg_rtx (V16SImode);
|
||||
rtx res2 = gen_reg_rtx (V16SImode);
|
||||
rtx sum = gen_reg_rtx (V16SImode);
|
||||
|
||||
emit_move_insn (sum, CONST0_RTX (V16SImode));
|
||||
emit_insn (gen_sdot_prodv32hi (res1, op1_lo, op2_lo, sum));
|
||||
emit_insn (gen_sdot_prodv32hi (res2, op1_hi, op2_hi, operands[3]));
|
||||
|
||||
emit_insn (gen_addv16si3 (operands[0], res1, res2));
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "udot_prod<mode>"
|
||||
[(match_operand:<ssedvecmode> 0 "register_operand")
|
||||
(match_operand:VI1_AVX2 1 "register_operand")
|
||||
(match_operand:VI1_AVX2 2 "register_operand")
|
||||
(match_operand:VI1_AVX512VNNIBW 1 "register_operand")
|
||||
(match_operand:VI1_AVX512VNNIBW 2 "register_operand")
|
||||
(match_operand:<ssedvecmode> 3 "register_operand")]
|
||||
"TARGET_SSE2"
|
||||
{
|
||||
if (TARGET_AVXVNNIINT8)
|
||||
if ((<MODE_SIZE> == 64 && TARGET_AVX10_2_512)
|
||||
|| (<MODE_SIZE> < 64 && (TARGET_AVXVNNIINT8 || TARGET_AVX10_2_256)))
|
||||
{
|
||||
operands[1] = lowpart_subreg (<ssedvecmode>mode,
|
||||
force_reg (<MODE>mode, operands[1]),
|
||||
|
@ -31352,36 +31331,6 @@
|
|||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "udot_prodv64qi"
|
||||
[(match_operand:V16SI 0 "register_operand")
|
||||
(match_operand:V64QI 1 "register_operand")
|
||||
(match_operand:V64QI 2 "register_operand")
|
||||
(match_operand:V16SI 3 "register_operand")]
|
||||
"(TARGET_AVX512VNNI || TARGET_AVX512BW) && TARGET_EVEX512"
|
||||
{
|
||||
/* Emulate with vpdpwssd. */
|
||||
rtx op1_lo = gen_reg_rtx (V32HImode);
|
||||
rtx op1_hi = gen_reg_rtx (V32HImode);
|
||||
rtx op2_lo = gen_reg_rtx (V32HImode);
|
||||
rtx op2_hi = gen_reg_rtx (V32HImode);
|
||||
|
||||
emit_insn (gen_vec_unpacku_lo_v64qi (op1_lo, operands[1]));
|
||||
emit_insn (gen_vec_unpacku_lo_v64qi (op2_lo, operands[2]));
|
||||
emit_insn (gen_vec_unpacku_hi_v64qi (op1_hi, operands[1]));
|
||||
emit_insn (gen_vec_unpacku_hi_v64qi (op2_hi, operands[2]));
|
||||
|
||||
rtx res1 = gen_reg_rtx (V16SImode);
|
||||
rtx res2 = gen_reg_rtx (V16SImode);
|
||||
rtx sum = gen_reg_rtx (V16SImode);
|
||||
|
||||
emit_move_insn (sum, CONST0_RTX (V16SImode));
|
||||
emit_insn (gen_sdot_prodv32hi (res1, op1_lo, op2_lo, sum));
|
||||
emit_insn (gen_sdot_prodv32hi (res2, op1_hi, op2_hi, operands[3]));
|
||||
|
||||
emit_insn (gen_addv16si3 (operands[0], res1, res2));
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_insn "vpdp<vpdotprodtype>_<mode>"
|
||||
[(set (match_operand:VI4_AVX 0 "register_operand" "=v")
|
||||
(unspec:VI4_AVX
|
||||
|
@ -31757,10 +31706,10 @@
|
|||
|
||||
(define_expand "usdot_prod<mode>"
|
||||
[(match_operand:<sseunpackmode> 0 "register_operand")
|
||||
(match_operand:VI2_AVX2 1 "register_operand")
|
||||
(match_operand:VI2_AVX2 2 "register_operand")
|
||||
(match_operand:VI2_AVX10_2 1 "register_operand")
|
||||
(match_operand:VI2_AVX10_2 2 "register_operand")
|
||||
(match_operand:<sseunpackmode> 3 "register_operand")]
|
||||
"TARGET_AVXVNNIINT16"
|
||||
"TARGET_AVXVNNIINT16 || TARGET_AVX10_2_256"
|
||||
{
|
||||
operands[1] = lowpart_subreg (<sseunpackmode>mode,
|
||||
force_reg (<MODE>mode, operands[1]),
|
||||
|
@ -31775,10 +31724,10 @@
|
|||
|
||||
(define_expand "udot_prod<mode>"
|
||||
[(match_operand:<sseunpackmode> 0 "register_operand")
|
||||
(match_operand:VI2_AVX2 1 "register_operand")
|
||||
(match_operand:VI2_AVX2 2 "register_operand")
|
||||
(match_operand:VI2_AVX10_2 1 "register_operand")
|
||||
(match_operand:VI2_AVX10_2 2 "register_operand")
|
||||
(match_operand:<sseunpackmode> 3 "register_operand")]
|
||||
"TARGET_AVXVNNIINT16"
|
||||
"TARGET_AVXVNNIINT16 || TARGET_AVX10_2_256"
|
||||
{
|
||||
operands[1] = lowpart_subreg (<sseunpackmode>mode,
|
||||
force_reg (<MODE>mode, operands[1]),
|
||||
|
|
|
@ -2,19 +2,24 @@
|
|||
/* { dg-options "-O2 -mavxvnniint16" } */
|
||||
/* { dg-require-effective-target avxvnniint16 } */
|
||||
|
||||
#ifndef AVX10_2
|
||||
#define AVXVNNIINT16
|
||||
#ifndef CHECK
|
||||
#define CHECK "avx-check.h"
|
||||
#endif
|
||||
|
||||
#ifndef TEST
|
||||
#define TEST avx_test
|
||||
#ifndef CHECK
|
||||
#define CHECK "avx-check.h"
|
||||
#endif
|
||||
|
||||
#include CHECK
|
||||
#include "vnniint16-auto-vectorize-1.c"
|
||||
|
||||
#ifndef TEST
|
||||
#define TEST avx_test
|
||||
#endif
|
||||
|
||||
#ifndef N
|
||||
#define N 256
|
||||
#endif
|
||||
|
||||
short a_i16[N];
|
||||
unsigned short b_u16[N], c_u16[N], d_u16[N];
|
||||
|
|
|
@ -0,0 +1,6 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-mavx10.2 -O2" } */
|
||||
/* { dg-final { scan-assembler "vpdpwusd\t" } } */
|
||||
/* { dg-final { scan-assembler "vpdpwuud\t" } } */
|
||||
|
||||
#include "vnniint16-auto-vectorize-1.c"
|
18
gcc/testsuite/gcc.target/i386/vnniint16-auto-vectorize-4.c
Normal file
18
gcc/testsuite/gcc.target/i386/vnniint16-auto-vectorize-4.c
Normal file
|
@ -0,0 +1,18 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-options "-O2 -mavx10.2-512" } */
|
||||
/* { dg-require-effective-target avx10_2_512 } */
|
||||
|
||||
#define N 512
|
||||
|
||||
#define AVX10_2
|
||||
#define AVX10_2_512
|
||||
#define AVX10_512BIT
|
||||
#define AVX512F_LEN 512
|
||||
|
||||
#define TEST test_512
|
||||
|
||||
#ifndef CHECK
|
||||
#define CHECK "avx10-check.h"
|
||||
#endif
|
||||
|
||||
#include "vnniint16-auto-vectorize-2.c"
|
|
@ -2,19 +2,25 @@
|
|||
/* { dg-options "-O2 -mavxvnniint8" } */
|
||||
/* { dg-require-effective-target avxvnniint8 } */
|
||||
|
||||
#ifndef AVX10_2
|
||||
#define AVXVNNIINT8
|
||||
#ifndef CHECK
|
||||
#define CHECK "avx-check.h"
|
||||
#endif
|
||||
|
||||
#ifndef TEST
|
||||
#define TEST avx_test
|
||||
#ifndef CHECK
|
||||
#define CHECK "avx-check.h"
|
||||
#endif
|
||||
|
||||
#include CHECK
|
||||
#include "vnniint8-auto-vectorize-1.c"
|
||||
|
||||
#ifndef TEST
|
||||
#define TEST avx_test
|
||||
#endif
|
||||
|
||||
#ifndef N
|
||||
#define N 256
|
||||
#endif
|
||||
|
||||
char a_i8[N], b_i8[N];
|
||||
unsigned char c_u8[N], d_u8[N];
|
||||
int i8_exp, i8_ref;
|
||||
|
|
|
@ -0,0 +1,6 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-mavx10.2 -O2" } */
|
||||
/* { dg-final { scan-assembler "vpdpbssd\t" } } */
|
||||
/* { dg-final { scan-assembler "vpdpbuud\t" } } */
|
||||
|
||||
#include "vnniint8-auto-vectorize-1.c"
|
18
gcc/testsuite/gcc.target/i386/vnniint8-auto-vectorize-4.c
Normal file
18
gcc/testsuite/gcc.target/i386/vnniint8-auto-vectorize-4.c
Normal file
|
@ -0,0 +1,18 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-options "-O2 -mavx10.2-512" } */
|
||||
/* { dg-require-effective-target avx10_2_512 } */
|
||||
|
||||
#define N 512
|
||||
|
||||
#define AVX10_2
|
||||
#define AVX10_2_512
|
||||
#define AVX10_512BIT
|
||||
#define AVX512F_LEN 512
|
||||
|
||||
#define TEST test_512
|
||||
|
||||
#ifndef CHECK
|
||||
#define CHECK "avx10-check.h"
|
||||
#endif
|
||||
|
||||
#include "vnniint8-auto-vectorize-2.c"
|
Loading…
Add table
Reference in a new issue