x86: use VPTERNLOG also for certain andnot forms

When it's the memory operand which is to be inverted, using VPANDN*
requires a further load instruction. The same can be achieved by a
single VPTERNLOG*. Add two new alternatives (for plain memory and
embedded broadcast), adjusting the predicate for the first operand
accordingly.

Two pre-existing testcases actually end up being affected (improved) by
the change, which is reflected in updated expectations there.

gcc/

	PR target/93768
	* config/i386/sse.md (*andnot<mode>3): Add new alternatives
	for memory form operand 1.

gcc/testsuite/

	PR target/93768
	* gcc.target/i386/avx512f-andn-di-zmm-2.c: New test.
	* gcc.target/i386/avx512f-andn-si-zmm-2.c: Adjust expecations
	towards generated code.
	* gcc.target/i386/pr100711-3.c: Adjust expectations for 32-bit
	code.
This commit is contained in:
Jan Beulich 2023-07-05 09:41:09 +02:00
parent 607613e516
commit 2d11c99dfc
4 changed files with 47 additions and 11 deletions

View file

@ -17210,11 +17210,13 @@
"TARGET_AVX512F")
(define_insn "*andnot<mode>3"
[(set (match_operand:VI 0 "register_operand" "=x,x,v")
[(set (match_operand:VI 0 "register_operand" "=x,x,v,v,v")
(and:VI
(not:VI (match_operand:VI 1 "vector_operand" "0,x,v"))
(match_operand:VI 2 "bcst_vector_operand" "xBm,xm,vmBr")))]
"TARGET_SSE"
(not:VI (match_operand:VI 1 "bcst_vector_operand" "0,x,v,m,Br"))
(match_operand:VI 2 "bcst_vector_operand" "xBm,xm,vmBr,v,v")))]
"TARGET_SSE
&& (register_operand (operands[1], <MODE>mode)
|| register_operand (operands[2], <MODE>mode))"
{
char buf[64];
const char *ops;
@ -17281,6 +17283,15 @@
case 2:
ops = "v%s%s\t{%%2, %%1, %%0|%%0, %%1, %%2}";
break;
case 3:
case 4:
tmp = "pternlog";
ssesuffix = "<ternlogsuffix>";
if (which_alternative != 4 || TARGET_AVX512VL)
ops = "v%s%s\t{$0x44, %%1, %%2, %%0|%%0, %%2, %%1, $0x44}";
else
ops = "v%s%s\t{$0x44, %%g1, %%g2, %%g0|%%g0, %%g2, %%g1, $0x44}";
break;
default:
gcc_unreachable ();
}
@ -17289,7 +17300,7 @@
output_asm_insn (buf, operands);
return "";
}
[(set_attr "isa" "noavx,avx,avx")
[(set_attr "isa" "noavx,avx,avx,*,*")
(set_attr "type" "sselog")
(set (attr "prefix_data16")
(if_then_else
@ -17297,9 +17308,12 @@
(eq_attr "mode" "TI"))
(const_string "1")
(const_string "*")))
(set_attr "prefix" "orig,vex,evex")
(set_attr "prefix" "orig,vex,evex,evex,evex")
(set (attr "mode")
(cond [(match_test "TARGET_AVX2")
(cond [(and (eq_attr "alternative" "3,4")
(match_test "<MODE_SIZE> < 64 && !TARGET_AVX512VL"))
(const_string "XI")
(match_test "TARGET_AVX2")
(const_string "<sseinsnmode>")
(match_test "TARGET_AVX")
(if_then_else
@ -17310,7 +17324,15 @@
(match_test "optimize_function_for_size_p (cfun)"))
(const_string "V4SF")
]
(const_string "<sseinsnmode>")))])
(const_string "<sseinsnmode>")))
(set (attr "enabled")
(cond [(eq_attr "alternative" "3")
(symbol_ref "<MODE_SIZE> == 64 || TARGET_AVX512VL")
(eq_attr "alternative" "4")
(symbol_ref "<MODE_SIZE> == 64 || TARGET_AVX512VL
|| (TARGET_AVX512F && !TARGET_PREFER_AVX256)")
]
(const_string "*")))])
;; PR target/100711: Split notl; vpbroadcastd; vpand as vpbroadcastd; vpandn
(define_split

View file

@ -0,0 +1,12 @@
/* { dg-do compile } */
/* { dg-options "-mavx512f -mno-avx512vl -mprefer-vector-width=512 -O2" } */
/* { dg-final { scan-assembler-times "vpternlogq\[ \\t\]+\\\$0x44, \\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
/* { dg-final { scan-assembler-not "vpbroadcast" } } */
#define type __m512i
#define vec 512
#define op andnot
#define suffix epi64
#define SCALAR long long
#include "avx512-binop-2.h"

View file

@ -1,7 +1,7 @@
/* { dg-do compile } */
/* { dg-options "-mavx512f -O2" } */
/* { dg-final { scan-assembler-times "vpbroadcastd\[^\n\]*%zmm\[0-9\]+" 1 } } */
/* { dg-final { scan-assembler-times "vpandnd\[^\n\]*%zmm\[0-9\]+" 1 } } */
/* { dg-final { scan-assembler-times "vpternlogd\[ \\t\]+\\\$0x44, \\(%(?:eax|rdi|edi)\\)\\\{1to\[1-8\]+\\\}, %zmm\[0-9\]+, %zmm0" 1 } } */
/* { dg-final { scan-assembler-not "vpbroadcast" } } */
#define type __m512i
#define vec 512

View file

@ -37,4 +37,6 @@ v8di foo_v8di (long long a, v8di b)
return (__extension__ (v8di) {~a, ~a, ~a, ~a, ~a, ~a, ~a, ~a}) & b;
}
/* { dg-final { scan-assembler-times "vpandn" 4 } } */
/* { dg-final { scan-assembler-times "vpandn" 4 { target { ! ia32 } } } } */
/* { dg-final { scan-assembler-times "vpandn" 2 { target { ia32 } } } } */
/* { dg-final { scan-assembler-times "vpternlog\[dq\]\[ \\t\]+\\\$0x44" 2 { target { ia32 } } } } */