diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index 27fe73ca65c..e547ee64587 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -70,6 +70,9 @@ extern machine_mode ix86_cc_mode (enum rtx_code, rtx, rtx); extern int avx_vpermilp_parallel (rtx par, machine_mode mode); extern int avx_vperm2f128_parallel (rtx par, machine_mode mode); +extern int vpternlog_redundant_operand_mask (rtx[]); +extern void substitute_vpternlog_operands (rtx[]); + extern bool ix86_expand_strlen (rtx, rtx, rtx, rtx); extern bool ix86_expand_set_or_cpymem (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx, bool); diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index eabc70011ea..8cd26eb54fa 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -19451,6 +19451,49 @@ avx_vperm2f128_parallel (rtx par, machine_mode mode) return mask + 1; } +/* Return a mask of VPTERNLOG operands that do not affect output. */ + +int +vpternlog_redundant_operand_mask (rtx *operands) +{ + int mask = 0; + int imm8 = XINT (operands[4], 0); + + if (((imm8 >> 4) & 0x0F) == (imm8 & 0x0F)) + mask |= 1; + if (((imm8 >> 2) & 0x33) == (imm8 & 0x33)) + mask |= 2; + if (((imm8 >> 1) & 0x55) == (imm8 & 0x55)) + mask |= 4; + + return mask; +} + +/* Eliminate false dependencies on operands that do not affect output + by substituting other operands of a VPTERNLOG. */ + +void +substitute_vpternlog_operands (rtx *operands) +{ + int mask = vpternlog_redundant_operand_mask (operands); + + if (mask & 1) /* The first operand is redundant. */ + operands[1] = operands[2]; + + if (mask & 2) /* The second operand is redundant. */ + operands[2] = operands[1]; + + if (mask & 4) /* The third operand is redundant. */ + operands[3] = operands[1]; + else if (REG_P (operands[3])) + { + if (mask & 1) + operands[1] = operands[3]; + if (mask & 2) + operands[2] = operands[3]; + } +} + /* Return a register priority for hard reg REGNO. */ static int ix86_register_priority (int hard_regno) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index f1712b001f9..7e2aa3f995c 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -12695,6 +12695,48 @@ (symbol_ref " == 64 || TARGET_AVX512VL") (const_string "*")))]) +;; When VPTERNLOG happens to be invariant w.r.t first and second operands, +;; and the third operand is memory, eliminate false dependencies by loading +;; memory into the output operand first. +(define_split + [(set (match_operand:V 0 "register_operand") + (unspec:V + [(match_operand:V 1 "register_operand") + (match_operand:V 2 "register_operand") + (match_operand:V 3 "memory_operand") + (match_operand:SI 4 "const_0_to_255_operand")] + UNSPEC_VTERNLOG))] + "!reload_completed && vpternlog_redundant_operand_mask (operands) == 3" + [(set (match_dup 0) + (match_dup 3)) + (set (match_dup 0) + (unspec:V + [(match_dup 0) + (match_dup 0) + (match_dup 0) + (match_dup 4)] + UNSPEC_VTERNLOG))]) + +;; Eliminate false dependencies when VPTERNLOG is invariant w.r.t any +;; of input operands (except the case handled in the above split). +(define_split + [(set (match_operand:V 0 "register_operand") + (unspec:V + [(match_operand:V 1 "register_operand") + (match_operand:V 2 "register_operand") + (match_operand:V 3 "nonimmediate_operand") + (match_operand:SI 4 "const_0_to_255_operand")] + UNSPEC_VTERNLOG))] + "!reload_completed && vpternlog_redundant_operand_mask (operands) != 0" + [(set (match_dup 0) + (unspec:V + [(match_dup 1) + (match_dup 2) + (match_dup 3) + (match_dup 4)] + UNSPEC_VTERNLOG))] + "substitute_vpternlog_operands (operands);") + ;; There must be lots of other combinations like ;; ;; (any_logic:V diff --git a/gcc/testsuite/gcc.target/i386/invariant-ternlog-1.c b/gcc/testsuite/gcc.target/i386/invariant-ternlog-1.c new file mode 100644 index 00000000000..21051c6bba0 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/invariant-ternlog-1.c @@ -0,0 +1,21 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512f -O2" } */ +/* { dg-final { scan-assembler-times "vmovdqa" 4 } } */ +/* { dg-final { scan-assembler-times {vpternlog[^\n\r]*\(%rdx\)} 2 } } */ + +#include + +__m512i f(__m512i* a, __m512i* b, __m512i* c) +{ + return _mm512_ternarylogic_epi64 (a[0], b[0], c[0], ~_MM_TERNLOG_B | ~_MM_TERNLOG_C); +} + +__m512i g(__m512i* a, __m512i* b, __m512i* c) +{ + return _mm512_ternarylogic_epi64 (a[0], b[0], c[0], ~_MM_TERNLOG_A | ~_MM_TERNLOG_C); +} + +__m512i h(__m512i* a, __m512i* b, __m512i* c) +{ + return _mm512_ternarylogic_epi64 (a[0], b[0], c[0], ~_MM_TERNLOG_A | ~_MM_TERNLOG_B); +} diff --git a/gcc/testsuite/gcc.target/i386/invariant-ternlog-2.c b/gcc/testsuite/gcc.target/i386/invariant-ternlog-2.c new file mode 100644 index 00000000000..d70bbb02390 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/invariant-ternlog-2.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512f -O2" } */ +/* { dg-final { scan-assembler-times "vmovdqa" 1 } } */ +/* { dg-final { scan-assembler "vpternlog.*zmm0.*zmm0.*zmm0" } } */ + +#include + +__m512i f(__m512i* a, __m512i* b, __m512i* c) +{ + return _mm512_ternarylogic_epi64 (a[0], b[0], c[0], ~_MM_TERNLOG_C); +} +