Check avx upper register for parallel.
For function arguments/return, when it's BLK mode, it's put in a parallel with an expr_list, and the expr_list contains the real mode and registers. Current ix86_check_avx_upper_register only checked for SSE_REG_P, and failed to handle that. The patch extend the handle to each subrtx. gcc/ChangeLog: PR target/116512 * config/i386/i386.cc (ix86_check_avx_upper_register): Iterate subrtx to scan for avx upper register. (ix86_check_avx_upper_stores): Inline old ix86_check_avx_upper_register. (ix86_avx_u128_mode_needed): Ditto, and replace FOR_EACH_SUBRTX with call to new ix86_check_avx_upper_register. gcc/testsuite/ChangeLog: * gcc.target/i386/pr116512.c: New test.
This commit is contained in:
parent
350d627d5e
commit
ab214ef734
2 changed files with 49 additions and 13 deletions
|
@ -14882,9 +14882,19 @@ ix86_dirflag_mode_needed (rtx_insn *insn)
|
|||
static bool
|
||||
ix86_check_avx_upper_register (const_rtx exp)
|
||||
{
|
||||
return (SSE_REG_P (exp)
|
||||
&& !EXT_REX_SSE_REG_P (exp)
|
||||
&& GET_MODE_BITSIZE (GET_MODE (exp)) > 128);
|
||||
/* construct_container may return a parallel with expr_list
|
||||
which contains the real reg and mode */
|
||||
subrtx_iterator::array_type array;
|
||||
FOR_EACH_SUBRTX (iter, array, exp, NONCONST)
|
||||
{
|
||||
const_rtx x = *iter;
|
||||
if (SSE_REG_P (x)
|
||||
&& !EXT_REX_SSE_REG_P (x)
|
||||
&& GET_MODE_BITSIZE (GET_MODE (x)) > 128)
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Check if a 256bit or 512bit AVX register is referenced in stores. */
|
||||
|
@ -14892,7 +14902,9 @@ ix86_check_avx_upper_register (const_rtx exp)
|
|||
static void
|
||||
ix86_check_avx_upper_stores (rtx dest, const_rtx, void *data)
|
||||
{
|
||||
if (ix86_check_avx_upper_register (dest))
|
||||
if (SSE_REG_P (dest)
|
||||
&& !EXT_REX_SSE_REG_P (dest)
|
||||
&& GET_MODE_BITSIZE (GET_MODE (dest)) > 128)
|
||||
{
|
||||
bool *used = (bool *) data;
|
||||
*used = true;
|
||||
|
@ -14951,14 +14963,14 @@ ix86_avx_u128_mode_needed (rtx_insn *insn)
|
|||
return AVX_U128_CLEAN;
|
||||
}
|
||||
|
||||
subrtx_iterator::array_type array;
|
||||
|
||||
rtx set = single_set (insn);
|
||||
if (set)
|
||||
{
|
||||
rtx dest = SET_DEST (set);
|
||||
rtx src = SET_SRC (set);
|
||||
if (ix86_check_avx_upper_register (dest))
|
||||
if (SSE_REG_P (dest)
|
||||
&& !EXT_REX_SSE_REG_P (dest)
|
||||
&& GET_MODE_BITSIZE (GET_MODE (dest)) > 128)
|
||||
{
|
||||
/* This is an YMM/ZMM load. Return AVX_U128_DIRTY if the
|
||||
source isn't zero. */
|
||||
|
@ -14969,9 +14981,8 @@ ix86_avx_u128_mode_needed (rtx_insn *insn)
|
|||
}
|
||||
else
|
||||
{
|
||||
FOR_EACH_SUBRTX (iter, array, src, NONCONST)
|
||||
if (ix86_check_avx_upper_register (*iter))
|
||||
return AVX_U128_DIRTY;
|
||||
if (ix86_check_avx_upper_register (src))
|
||||
return AVX_U128_DIRTY;
|
||||
}
|
||||
|
||||
/* This isn't YMM/ZMM load/store. */
|
||||
|
@ -14982,9 +14993,8 @@ ix86_avx_u128_mode_needed (rtx_insn *insn)
|
|||
Hardware changes state only when a 256bit register is written to,
|
||||
but we need to prevent the compiler from moving optimal insertion
|
||||
point above eventual read from 256bit or 512 bit register. */
|
||||
FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
|
||||
if (ix86_check_avx_upper_register (*iter))
|
||||
return AVX_U128_DIRTY;
|
||||
if (ix86_check_avx_upper_register (PATTERN (insn)))
|
||||
return AVX_U128_DIRTY;
|
||||
|
||||
return AVX_U128_ANY;
|
||||
}
|
||||
|
|
26
gcc/testsuite/gcc.target/i386/pr116512.c
Normal file
26
gcc/testsuite/gcc.target/i386/pr116512.c
Normal file
|
@ -0,0 +1,26 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-march=x86-64-v4 -O2" } */
|
||||
/* { dg-final { scan-assembler-not "vzeroupper" { target { ! ia32 } } } } */
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
struct B {
|
||||
union {
|
||||
__m512 f;
|
||||
__m512i s;
|
||||
};
|
||||
};
|
||||
|
||||
struct B foo(int n) {
|
||||
struct B res;
|
||||
res.s = _mm512_set1_epi32(n);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
__m512i bar(int n) {
|
||||
struct B res;
|
||||
res.s = _mm512_set1_epi32(n);
|
||||
|
||||
return res.s;
|
||||
}
|
Loading…
Add table
Reference in a new issue