Check avx upper register for parallel.

For function arguments/return, when it's BLK mode, it's put in a
parallel with an expr_list, and the expr_list contains the real mode
and registers.
Current ix86_check_avx_upper_register only checked for SSE_REG_P, and
failed to handle that. The patch extend the handle to each subrtx.

gcc/ChangeLog:

	PR target/116512
	* config/i386/i386.cc (ix86_check_avx_upper_register): Iterate
	subrtx to scan for avx upper register.
	(ix86_check_avx_upper_stores): Inline old
	ix86_check_avx_upper_register.
	(ix86_avx_u128_mode_needed): Ditto, and replace
	FOR_EACH_SUBRTX with call to new
	ix86_check_avx_upper_register.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/pr116512.c: New test.
This commit is contained in:
liuhongt 2024-08-29 11:39:20 +08:00
parent 350d627d5e
commit ab214ef734
2 changed files with 49 additions and 13 deletions

View file

@ -14882,9 +14882,19 @@ ix86_dirflag_mode_needed (rtx_insn *insn)
static bool
ix86_check_avx_upper_register (const_rtx exp)
{
return (SSE_REG_P (exp)
&& !EXT_REX_SSE_REG_P (exp)
&& GET_MODE_BITSIZE (GET_MODE (exp)) > 128);
/* construct_container may return a parallel with expr_list
which contains the real reg and mode */
subrtx_iterator::array_type array;
FOR_EACH_SUBRTX (iter, array, exp, NONCONST)
{
const_rtx x = *iter;
if (SSE_REG_P (x)
&& !EXT_REX_SSE_REG_P (x)
&& GET_MODE_BITSIZE (GET_MODE (x)) > 128)
return true;
}
return false;
}
/* Check if a 256bit or 512bit AVX register is referenced in stores. */
@ -14892,7 +14902,9 @@ ix86_check_avx_upper_register (const_rtx exp)
static void
ix86_check_avx_upper_stores (rtx dest, const_rtx, void *data)
{
if (ix86_check_avx_upper_register (dest))
if (SSE_REG_P (dest)
&& !EXT_REX_SSE_REG_P (dest)
&& GET_MODE_BITSIZE (GET_MODE (dest)) > 128)
{
bool *used = (bool *) data;
*used = true;
@ -14951,14 +14963,14 @@ ix86_avx_u128_mode_needed (rtx_insn *insn)
return AVX_U128_CLEAN;
}
subrtx_iterator::array_type array;
rtx set = single_set (insn);
if (set)
{
rtx dest = SET_DEST (set);
rtx src = SET_SRC (set);
if (ix86_check_avx_upper_register (dest))
if (SSE_REG_P (dest)
&& !EXT_REX_SSE_REG_P (dest)
&& GET_MODE_BITSIZE (GET_MODE (dest)) > 128)
{
/* This is an YMM/ZMM load. Return AVX_U128_DIRTY if the
source isn't zero. */
@ -14969,9 +14981,8 @@ ix86_avx_u128_mode_needed (rtx_insn *insn)
}
else
{
FOR_EACH_SUBRTX (iter, array, src, NONCONST)
if (ix86_check_avx_upper_register (*iter))
return AVX_U128_DIRTY;
if (ix86_check_avx_upper_register (src))
return AVX_U128_DIRTY;
}
/* This isn't YMM/ZMM load/store. */
@ -14982,9 +14993,8 @@ ix86_avx_u128_mode_needed (rtx_insn *insn)
Hardware changes state only when a 256bit register is written to,
but we need to prevent the compiler from moving optimal insertion
point above eventual read from 256bit or 512 bit register. */
FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
if (ix86_check_avx_upper_register (*iter))
return AVX_U128_DIRTY;
if (ix86_check_avx_upper_register (PATTERN (insn)))
return AVX_U128_DIRTY;
return AVX_U128_ANY;
}

View file

@ -0,0 +1,26 @@
/* { dg-do compile } */
/* { dg-options "-march=x86-64-v4 -O2" } */
/* { dg-final { scan-assembler-not "vzeroupper" { target { ! ia32 } } } } */
#include <immintrin.h>
struct B {
union {
__m512 f;
__m512i s;
};
};
struct B foo(int n) {
struct B res;
res.s = _mm512_set1_epi32(n);
return res;
}
__m512i bar(int n) {
struct B res;
res.s = _mm512_set1_epi32(n);
return res.s;
}