i386: eliminate redundant operands of VPTERNLOG
As mentioned in PR 110202, GCC may be presented with input where control word of the VPTERNLOG intrinsic implies that some of its operands do not affect the result. In that case, we can eliminate redundant operands of the instruction by substituting any other operand in their place. This removes false dependencies. For instance, instead of (252 = 0xfc = _MM_TERNLOG_A | _MM_TERNLOG_B) vpternlogq $252, %zmm2, %zmm1, %zmm0 emit vpternlogq $252, %zmm0, %zmm1, %zmm0 When VPTERNLOG is invariant w.r.t first and second operands, and the third operand is memory, load memory into the output operand first, i.e. instead of (85 = 0x55 = ~_MM_TERNLOG_C) vpternlogq $85, (%rdi), %zmm1, %zmm0 emit vmovdqa64 (%rdi), %zmm0 vpternlogq $85, %zmm0, %zmm0, %zmm0 gcc/ChangeLog: PR target/110202 * config/i386/i386-protos.h (vpternlog_redundant_operand_mask): Declare. (substitute_vpternlog_operands): Declare. * config/i386/i386.cc (vpternlog_redundant_operand_mask): New helper. (substitute_vpternlog_operands): New function. Use them... * config/i386/sse.md: ... here in new VPTERNLOG define_splits. gcc/testsuite/ChangeLog: PR target/110202 * gcc.target/i386/invariant-ternlog-1.c: New test. * gcc.target/i386/invariant-ternlog-2.c: New test.
This commit is contained in:
parent
c572f09a75
commit
567d06bb35
5 changed files with 121 additions and 0 deletions
|
@ -70,6 +70,9 @@ extern machine_mode ix86_cc_mode (enum rtx_code, rtx, rtx);
|
|||
extern int avx_vpermilp_parallel (rtx par, machine_mode mode);
|
||||
extern int avx_vperm2f128_parallel (rtx par, machine_mode mode);
|
||||
|
||||
extern int vpternlog_redundant_operand_mask (rtx[]);
|
||||
extern void substitute_vpternlog_operands (rtx[]);
|
||||
|
||||
extern bool ix86_expand_strlen (rtx, rtx, rtx, rtx);
|
||||
extern bool ix86_expand_set_or_cpymem (rtx, rtx, rtx, rtx, rtx, rtx,
|
||||
rtx, rtx, rtx, rtx, bool);
|
||||
|
|
|
@ -19451,6 +19451,49 @@ avx_vperm2f128_parallel (rtx par, machine_mode mode)
|
|||
return mask + 1;
|
||||
}
|
||||
|
||||
/* Return a mask of VPTERNLOG operands that do not affect output. */
|
||||
|
||||
int
|
||||
vpternlog_redundant_operand_mask (rtx *operands)
|
||||
{
|
||||
int mask = 0;
|
||||
int imm8 = XINT (operands[4], 0);
|
||||
|
||||
if (((imm8 >> 4) & 0x0F) == (imm8 & 0x0F))
|
||||
mask |= 1;
|
||||
if (((imm8 >> 2) & 0x33) == (imm8 & 0x33))
|
||||
mask |= 2;
|
||||
if (((imm8 >> 1) & 0x55) == (imm8 & 0x55))
|
||||
mask |= 4;
|
||||
|
||||
return mask;
|
||||
}
|
||||
|
||||
/* Eliminate false dependencies on operands that do not affect output
|
||||
by substituting other operands of a VPTERNLOG. */
|
||||
|
||||
void
|
||||
substitute_vpternlog_operands (rtx *operands)
|
||||
{
|
||||
int mask = vpternlog_redundant_operand_mask (operands);
|
||||
|
||||
if (mask & 1) /* The first operand is redundant. */
|
||||
operands[1] = operands[2];
|
||||
|
||||
if (mask & 2) /* The second operand is redundant. */
|
||||
operands[2] = operands[1];
|
||||
|
||||
if (mask & 4) /* The third operand is redundant. */
|
||||
operands[3] = operands[1];
|
||||
else if (REG_P (operands[3]))
|
||||
{
|
||||
if (mask & 1)
|
||||
operands[1] = operands[3];
|
||||
if (mask & 2)
|
||||
operands[2] = operands[3];
|
||||
}
|
||||
}
|
||||
|
||||
/* Return a register priority for hard reg REGNO. */
|
||||
static int
|
||||
ix86_register_priority (int hard_regno)
|
||||
|
|
|
@ -12695,6 +12695,48 @@
|
|||
(symbol_ref "<MODE_SIZE> == 64 || TARGET_AVX512VL")
|
||||
(const_string "*")))])
|
||||
|
||||
;; When VPTERNLOG happens to be invariant w.r.t first and second operands,
|
||||
;; and the third operand is memory, eliminate false dependencies by loading
|
||||
;; memory into the output operand first.
|
||||
(define_split
|
||||
[(set (match_operand:V 0 "register_operand")
|
||||
(unspec:V
|
||||
[(match_operand:V 1 "register_operand")
|
||||
(match_operand:V 2 "register_operand")
|
||||
(match_operand:V 3 "memory_operand")
|
||||
(match_operand:SI 4 "const_0_to_255_operand")]
|
||||
UNSPEC_VTERNLOG))]
|
||||
"!reload_completed && vpternlog_redundant_operand_mask (operands) == 3"
|
||||
[(set (match_dup 0)
|
||||
(match_dup 3))
|
||||
(set (match_dup 0)
|
||||
(unspec:V
|
||||
[(match_dup 0)
|
||||
(match_dup 0)
|
||||
(match_dup 0)
|
||||
(match_dup 4)]
|
||||
UNSPEC_VTERNLOG))])
|
||||
|
||||
;; Eliminate false dependencies when VPTERNLOG is invariant w.r.t any
|
||||
;; of input operands (except the case handled in the above split).
|
||||
(define_split
|
||||
[(set (match_operand:V 0 "register_operand")
|
||||
(unspec:V
|
||||
[(match_operand:V 1 "register_operand")
|
||||
(match_operand:V 2 "register_operand")
|
||||
(match_operand:V 3 "nonimmediate_operand")
|
||||
(match_operand:SI 4 "const_0_to_255_operand")]
|
||||
UNSPEC_VTERNLOG))]
|
||||
"!reload_completed && vpternlog_redundant_operand_mask (operands) != 0"
|
||||
[(set (match_dup 0)
|
||||
(unspec:V
|
||||
[(match_dup 1)
|
||||
(match_dup 2)
|
||||
(match_dup 3)
|
||||
(match_dup 4)]
|
||||
UNSPEC_VTERNLOG))]
|
||||
"substitute_vpternlog_operands (operands);")
|
||||
|
||||
;; There must be lots of other combinations like
|
||||
;;
|
||||
;; (any_logic:V
|
||||
|
|
21
gcc/testsuite/gcc.target/i386/invariant-ternlog-1.c
Normal file
21
gcc/testsuite/gcc.target/i386/invariant-ternlog-1.c
Normal file
|
@ -0,0 +1,21 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-mavx512f -O2" } */
|
||||
/* { dg-final { scan-assembler-times "vmovdqa" 4 } } */
|
||||
/* { dg-final { scan-assembler-times {vpternlog[^\n\r]*\(%rdx\)} 2 } } */
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
__m512i f(__m512i* a, __m512i* b, __m512i* c)
|
||||
{
|
||||
return _mm512_ternarylogic_epi64 (a[0], b[0], c[0], ~_MM_TERNLOG_B | ~_MM_TERNLOG_C);
|
||||
}
|
||||
|
||||
__m512i g(__m512i* a, __m512i* b, __m512i* c)
|
||||
{
|
||||
return _mm512_ternarylogic_epi64 (a[0], b[0], c[0], ~_MM_TERNLOG_A | ~_MM_TERNLOG_C);
|
||||
}
|
||||
|
||||
__m512i h(__m512i* a, __m512i* b, __m512i* c)
|
||||
{
|
||||
return _mm512_ternarylogic_epi64 (a[0], b[0], c[0], ~_MM_TERNLOG_A | ~_MM_TERNLOG_B);
|
||||
}
|
12
gcc/testsuite/gcc.target/i386/invariant-ternlog-2.c
Normal file
12
gcc/testsuite/gcc.target/i386/invariant-ternlog-2.c
Normal file
|
@ -0,0 +1,12 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-mavx512f -O2" } */
|
||||
/* { dg-final { scan-assembler-times "vmovdqa" 1 } } */
|
||||
/* { dg-final { scan-assembler "vpternlog.*zmm0.*zmm0.*zmm0" } } */
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
__m512i f(__m512i* a, __m512i* b, __m512i* c)
|
||||
{
|
||||
return _mm512_ternarylogic_epi64 (a[0], b[0], c[0], ~_MM_TERNLOG_C);
|
||||
}
|
||||
|
Loading…
Add table
Reference in a new issue