x86: use VPTERNLOG for further bitwise two-vector operations
All combinations of and, ior, xor, and not involving two operands can be expressed that way in a single insn. gcc/ PR target/93768 * config/i386/i386.cc (ix86_rtx_costs): Further special-case bitwise vector operations. * config/i386/sse.md (*iornot<mode>3): New insn. (*xnor<mode>3): Likewise. (*<nlogic><mode>3): Likewise. (andor): New code iterator. (nlogic): New code attribute. (ternlog_nlogic): Likewise. gcc/testsuite/ PR target/93768 * gcc.target/i386/avx512-binop-not-1.h: New. * gcc.target/i386/avx512-binop-not-2.h: New. * gcc.target/i386/avx512f-orn-si-zmm-1.c: New test. * gcc.target/i386/avx512f-orn-si-zmm-2.c: New test.
This commit is contained in:
parent
450b9566d5
commit
607613e516
6 changed files with 198 additions and 4 deletions
|
@ -21179,6 +21179,32 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
|
|||
return false;
|
||||
|
||||
case IOR:
|
||||
if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
|
||||
{
|
||||
/* (ior (not ...) ...) can be a single insn in AVX512. */
|
||||
if (GET_CODE (XEXP (x, 0)) == NOT && TARGET_AVX512F
|
||||
&& (GET_MODE_SIZE (mode) == 64
|
||||
|| (TARGET_AVX512VL
|
||||
&& (GET_MODE_SIZE (mode) == 32
|
||||
|| GET_MODE_SIZE (mode) == 16))))
|
||||
{
|
||||
rtx right = GET_CODE (XEXP (x, 1)) != NOT
|
||||
? XEXP (x, 1) : XEXP (XEXP (x, 1), 0);
|
||||
|
||||
*total = ix86_vec_cost (mode, cost->sse_op)
|
||||
+ rtx_cost (XEXP (XEXP (x, 0), 0), mode,
|
||||
outer_code, opno, speed)
|
||||
+ rtx_cost (right, mode, outer_code, opno, speed);
|
||||
return true;
|
||||
}
|
||||
*total = ix86_vec_cost (mode, cost->sse_op);
|
||||
}
|
||||
else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
|
||||
*total = cost->add * 2;
|
||||
else
|
||||
*total = cost->add;
|
||||
return false;
|
||||
|
||||
case XOR:
|
||||
if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
|
||||
*total = ix86_vec_cost (mode, cost->sse_op);
|
||||
|
@ -21199,11 +21225,20 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
|
|||
/* pandn is a single instruction. */
|
||||
if (GET_CODE (XEXP (x, 0)) == NOT)
|
||||
{
|
||||
rtx right = XEXP (x, 1);
|
||||
|
||||
/* (and (not ...) (not ...)) can be a single insn in AVX512. */
|
||||
if (GET_CODE (right) == NOT && TARGET_AVX512F
|
||||
&& (GET_MODE_SIZE (mode) == 64
|
||||
|| (TARGET_AVX512VL
|
||||
&& (GET_MODE_SIZE (mode) == 32
|
||||
|| GET_MODE_SIZE (mode) == 16))))
|
||||
right = XEXP (right, 0);
|
||||
|
||||
*total = ix86_vec_cost (mode, cost->sse_op)
|
||||
+ rtx_cost (XEXP (XEXP (x, 0), 0), mode,
|
||||
outer_code, opno, speed)
|
||||
+ rtx_cost (XEXP (x, 1), mode,
|
||||
outer_code, opno, speed);
|
||||
+ rtx_cost (right, mode, outer_code, opno, speed);
|
||||
return true;
|
||||
}
|
||||
else if (GET_CODE (XEXP (x, 1)) == NOT)
|
||||
|
@ -21261,8 +21296,25 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
|
|||
|
||||
case NOT:
|
||||
if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
|
||||
// vnot is pxor -1.
|
||||
*total = ix86_vec_cost (mode, cost->sse_op) + 1;
|
||||
{
|
||||
/* (not (xor ...)) can be a single insn in AVX512. */
|
||||
if (GET_CODE (XEXP (x, 0)) == XOR && TARGET_AVX512F
|
||||
&& (GET_MODE_SIZE (mode) == 64
|
||||
|| (TARGET_AVX512VL
|
||||
&& (GET_MODE_SIZE (mode) == 32
|
||||
|| GET_MODE_SIZE (mode) == 16))))
|
||||
{
|
||||
*total = ix86_vec_cost (mode, cost->sse_op)
|
||||
+ rtx_cost (XEXP (XEXP (x, 0), 0), mode,
|
||||
outer_code, opno, speed)
|
||||
+ rtx_cost (XEXP (XEXP (x, 0), 1), mode,
|
||||
outer_code, opno, speed);
|
||||
return true;
|
||||
}
|
||||
|
||||
// vnot is pxor -1.
|
||||
*total = ix86_vec_cost (mode, cost->sse_op) + 1;
|
||||
}
|
||||
else if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
|
||||
*total = cost->add * 2;
|
||||
else
|
||||
|
|
|
@ -17616,6 +17616,98 @@
|
|||
operands[2] = force_reg (V1TImode, CONSTM1_RTX (V1TImode));
|
||||
})
|
||||
|
||||
(define_insn "*iornot<mode>3"
|
||||
[(set (match_operand:VI 0 "register_operand" "=v,v,v,v")
|
||||
(ior:VI
|
||||
(not:VI
|
||||
(match_operand:VI 1 "bcst_vector_operand" "v,Br,v,m"))
|
||||
(match_operand:VI 2 "bcst_vector_operand" "vBr,v,m,v")))]
|
||||
"(<MODE_SIZE> == 64 || TARGET_AVX512VL
|
||||
|| (TARGET_AVX512F && !TARGET_PREFER_AVX256))
|
||||
&& (register_operand (operands[1], <MODE>mode)
|
||||
|| register_operand (operands[2], <MODE>mode))"
|
||||
{
|
||||
if (!register_operand (operands[1], <MODE>mode))
|
||||
{
|
||||
if (TARGET_AVX512VL)
|
||||
return "vpternlog<ternlogsuffix>\t{$0xdd, %1, %2, %0|%0, %2, %1, 0xdd}";
|
||||
return "vpternlog<ternlogsuffix>\t{$0xdd, %g1, %g2, %g0|%g0, %g2, %g1, 0xdd}";
|
||||
}
|
||||
if (TARGET_AVX512VL)
|
||||
return "vpternlog<ternlogsuffix>\t{$0xbb, %2, %1, %0|%0, %1, %2, 0xbb}";
|
||||
return "vpternlog<ternlogsuffix>\t{$0xbb, %g2, %g1, %g0|%g0, %g1, %g2, 0xbb}";
|
||||
}
|
||||
[(set_attr "type" "sselog")
|
||||
(set_attr "length_immediate" "1")
|
||||
(set_attr "prefix" "evex")
|
||||
(set (attr "mode")
|
||||
(if_then_else (match_test "TARGET_AVX512VL")
|
||||
(const_string "<sseinsnmode>")
|
||||
(const_string "XI")))
|
||||
(set (attr "enabled")
|
||||
(if_then_else (eq_attr "alternative" "2,3")
|
||||
(symbol_ref "<MODE_SIZE> == 64 || TARGET_AVX512VL")
|
||||
(const_string "*")))])
|
||||
|
||||
(define_insn "*xnor<mode>3"
|
||||
[(set (match_operand:VI 0 "register_operand" "=v,v")
|
||||
(not:VI
|
||||
(xor:VI
|
||||
(match_operand:VI 1 "bcst_vector_operand" "%v,v")
|
||||
(match_operand:VI 2 "bcst_vector_operand" "vBr,m"))))]
|
||||
"(<MODE_SIZE> == 64 || TARGET_AVX512VL
|
||||
|| (TARGET_AVX512F && !TARGET_PREFER_AVX256))
|
||||
&& (register_operand (operands[1], <MODE>mode)
|
||||
|| register_operand (operands[2], <MODE>mode))"
|
||||
{
|
||||
if (TARGET_AVX512VL)
|
||||
return "vpternlog<ternlogsuffix>\t{$0x99, %2, %1, %0|%0, %1, %2, 0x99}";
|
||||
else
|
||||
return "vpternlog<ternlogsuffix>\t{$0x99, %g2, %g1, %g0|%g0, %g1, %g2, 0x99}";
|
||||
}
|
||||
[(set_attr "type" "sselog")
|
||||
(set_attr "length_immediate" "1")
|
||||
(set_attr "prefix" "evex")
|
||||
(set (attr "mode")
|
||||
(if_then_else (match_test "TARGET_AVX512VL")
|
||||
(const_string "<sseinsnmode>")
|
||||
(const_string "XI")))
|
||||
(set (attr "enabled")
|
||||
(if_then_else (eq_attr "alternative" "1")
|
||||
(symbol_ref "<MODE_SIZE> == 64 || TARGET_AVX512VL")
|
||||
(const_string "*")))])
|
||||
|
||||
(define_code_iterator andor [and ior])
|
||||
(define_code_attr nlogic [(and "nor") (ior "nand")])
|
||||
(define_code_attr ternlog_nlogic [(and "0x11") (ior "0x77")])
|
||||
|
||||
(define_insn "*<nlogic><mode>3"
|
||||
[(set (match_operand:VI 0 "register_operand" "=v,v")
|
||||
(andor:VI
|
||||
(not:VI (match_operand:VI 1 "bcst_vector_operand" "%v,v"))
|
||||
(not:VI (match_operand:VI 2 "bcst_vector_operand" "vBr,m"))))]
|
||||
"(<MODE_SIZE> == 64 || TARGET_AVX512VL
|
||||
|| (TARGET_AVX512F && !TARGET_PREFER_AVX256))
|
||||
&& (register_operand (operands[1], <MODE>mode)
|
||||
|| register_operand (operands[2], <MODE>mode))"
|
||||
{
|
||||
if (TARGET_AVX512VL)
|
||||
return "vpternlog<ternlogsuffix>\t{$<ternlog_nlogic>, %2, %1, %0|%0, %1, %2, <ternlog_nlogic>}";
|
||||
else
|
||||
return "vpternlog<ternlogsuffix>\t{$<ternlog_nlogic>, %g2, %g1, %g0|%g0, %g1, %g2, <ternlog_nlogic>}";
|
||||
}
|
||||
[(set_attr "type" "sselog")
|
||||
(set_attr "length_immediate" "1")
|
||||
(set_attr "prefix" "evex")
|
||||
(set (attr "mode")
|
||||
(if_then_else (match_test "TARGET_AVX512VL")
|
||||
(const_string "<sseinsnmode>")
|
||||
(const_string "XI")))
|
||||
(set (attr "enabled")
|
||||
(if_then_else (eq_attr "alternative" "1")
|
||||
(symbol_ref "<MODE_SIZE> == 64 || TARGET_AVX512VL")
|
||||
(const_string "*")))])
|
||||
|
||||
(define_mode_iterator AVX512ZEXTMASK
|
||||
[(DI "TARGET_AVX512BW") (SI "TARGET_AVX512BW") HI])
|
||||
|
||||
|
|
13
gcc/testsuite/gcc.target/i386/avx512-binop-not-1.h
Normal file
13
gcc/testsuite/gcc.target/i386/avx512-binop-not-1.h
Normal file
|
@ -0,0 +1,13 @@
|
|||
#include <immintrin.h>
|
||||
|
||||
#define PASTER2(x,y) x##y
|
||||
#define PASTER3(x,y,z) _mm##x##_##y##_##z
|
||||
#define OP(vec, op, suffix) PASTER3 (vec, op, suffix)
|
||||
#define DUP(vec, suffix, val) PASTER3 (vec, set1, suffix) (val)
|
||||
|
||||
type
|
||||
foo (type x, SCALAR *f)
|
||||
{
|
||||
return OP (vec, op, suffix) (x, OP (vec, xor, suffix) (DUP (vec, suffix, *f),
|
||||
DUP (vec, suffix, ~0)));
|
||||
}
|
13
gcc/testsuite/gcc.target/i386/avx512-binop-not-2.h
Normal file
13
gcc/testsuite/gcc.target/i386/avx512-binop-not-2.h
Normal file
|
@ -0,0 +1,13 @@
|
|||
#include <immintrin.h>
|
||||
|
||||
#define PASTER2(x,y) x##y
|
||||
#define PASTER3(x,y,z) _mm##x##_##y##_##z
|
||||
#define OP(vec, op, suffix) PASTER3 (vec, op, suffix)
|
||||
#define DUP(vec, suffix, val) PASTER3 (vec, set1, suffix) (val)
|
||||
|
||||
type
|
||||
foo (type x, SCALAR *f)
|
||||
{
|
||||
return OP (vec, op, suffix) (OP (vec, xor, suffix) (x, DUP (vec, suffix, ~0)),
|
||||
DUP (vec, suffix, *f));
|
||||
}
|
12
gcc/testsuite/gcc.target/i386/avx512f-orn-si-zmm-1.c
Normal file
12
gcc/testsuite/gcc.target/i386/avx512f-orn-si-zmm-1.c
Normal file
|
@ -0,0 +1,12 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-mavx512f -mno-avx512vl -mprefer-vector-width=512 -O2" } */
|
||||
/* { dg-final { scan-assembler-times "vpternlogd\[ \\t\]+\\\$0xdd, \\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
|
||||
/* { dg-final { scan-assembler-not "vpbroadcast" } } */
|
||||
|
||||
#define type __m512i
|
||||
#define vec 512
|
||||
#define op or
|
||||
#define suffix epi32
|
||||
#define SCALAR int
|
||||
|
||||
#include "avx512-binop-not-1.h"
|
12
gcc/testsuite/gcc.target/i386/avx512f-orn-si-zmm-2.c
Normal file
12
gcc/testsuite/gcc.target/i386/avx512f-orn-si-zmm-2.c
Normal file
|
@ -0,0 +1,12 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-mavx512f -mno-avx512vl -mprefer-vector-width=512 -O2" } */
|
||||
/* { dg-final { scan-assembler-times "vpternlogd\[ \\t\]+\\\$0xbb, \\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
|
||||
/* { dg-final { scan-assembler-not "vpbroadcast" } } */
|
||||
|
||||
#define type __m512i
|
||||
#define vec 512
|
||||
#define op or
|
||||
#define suffix epi32
|
||||
#define SCALAR int
|
||||
|
||||
#include "avx512-binop-not-2.h"
|
Loading…
Add table
Reference in a new issue