From ab214ef734bfc3dcffcf79ff9e1dd651c2b40566 Mon Sep 17 00:00:00 2001 From: liuhongt Date: Thu, 29 Aug 2024 11:39:20 +0800 Subject: [PATCH] Check avx upper register for parallel. For function arguments/return, when it's BLK mode, it's put in a parallel with an expr_list, and the expr_list contains the real mode and registers. Current ix86_check_avx_upper_register only checked for SSE_REG_P, and failed to handle that. The patch extend the handle to each subrtx. gcc/ChangeLog: PR target/116512 * config/i386/i386.cc (ix86_check_avx_upper_register): Iterate subrtx to scan for avx upper register. (ix86_check_avx_upper_stores): Inline old ix86_check_avx_upper_register. (ix86_avx_u128_mode_needed): Ditto, and replace FOR_EACH_SUBRTX with call to new ix86_check_avx_upper_register. gcc/testsuite/ChangeLog: * gcc.target/i386/pr116512.c: New test. --- gcc/config/i386/i386.cc | 36 +++++++++++++++--------- gcc/testsuite/gcc.target/i386/pr116512.c | 26 +++++++++++++++++ 2 files changed, 49 insertions(+), 13 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr116512.c diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index a1f65d41fdd..546c964d2a4 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -14882,9 +14882,19 @@ ix86_dirflag_mode_needed (rtx_insn *insn) static bool ix86_check_avx_upper_register (const_rtx exp) { - return (SSE_REG_P (exp) - && !EXT_REX_SSE_REG_P (exp) - && GET_MODE_BITSIZE (GET_MODE (exp)) > 128); + /* construct_container may return a parallel with expr_list + which contains the real reg and mode */ + subrtx_iterator::array_type array; + FOR_EACH_SUBRTX (iter, array, exp, NONCONST) + { + const_rtx x = *iter; + if (SSE_REG_P (x) + && !EXT_REX_SSE_REG_P (x) + && GET_MODE_BITSIZE (GET_MODE (x)) > 128) + return true; + } + + return false; } /* Check if a 256bit or 512bit AVX register is referenced in stores. */ @@ -14892,7 +14902,9 @@ ix86_check_avx_upper_register (const_rtx exp) static void ix86_check_avx_upper_stores (rtx dest, const_rtx, void *data) { - if (ix86_check_avx_upper_register (dest)) + if (SSE_REG_P (dest) + && !EXT_REX_SSE_REG_P (dest) + && GET_MODE_BITSIZE (GET_MODE (dest)) > 128) { bool *used = (bool *) data; *used = true; @@ -14951,14 +14963,14 @@ ix86_avx_u128_mode_needed (rtx_insn *insn) return AVX_U128_CLEAN; } - subrtx_iterator::array_type array; - rtx set = single_set (insn); if (set) { rtx dest = SET_DEST (set); rtx src = SET_SRC (set); - if (ix86_check_avx_upper_register (dest)) + if (SSE_REG_P (dest) + && !EXT_REX_SSE_REG_P (dest) + && GET_MODE_BITSIZE (GET_MODE (dest)) > 128) { /* This is an YMM/ZMM load. Return AVX_U128_DIRTY if the source isn't zero. */ @@ -14969,9 +14981,8 @@ ix86_avx_u128_mode_needed (rtx_insn *insn) } else { - FOR_EACH_SUBRTX (iter, array, src, NONCONST) - if (ix86_check_avx_upper_register (*iter)) - return AVX_U128_DIRTY; + if (ix86_check_avx_upper_register (src)) + return AVX_U128_DIRTY; } /* This isn't YMM/ZMM load/store. */ @@ -14982,9 +14993,8 @@ ix86_avx_u128_mode_needed (rtx_insn *insn) Hardware changes state only when a 256bit register is written to, but we need to prevent the compiler from moving optimal insertion point above eventual read from 256bit or 512 bit register. */ - FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST) - if (ix86_check_avx_upper_register (*iter)) - return AVX_U128_DIRTY; + if (ix86_check_avx_upper_register (PATTERN (insn))) + return AVX_U128_DIRTY; return AVX_U128_ANY; } diff --git a/gcc/testsuite/gcc.target/i386/pr116512.c b/gcc/testsuite/gcc.target/i386/pr116512.c new file mode 100644 index 00000000000..c2bc6c91b64 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr116512.c @@ -0,0 +1,26 @@ +/* { dg-do compile } */ +/* { dg-options "-march=x86-64-v4 -O2" } */ +/* { dg-final { scan-assembler-not "vzeroupper" { target { ! ia32 } } } } */ + +#include + +struct B { + union { + __m512 f; + __m512i s; + }; +}; + +struct B foo(int n) { + struct B res; + res.s = _mm512_set1_epi32(n); + + return res; +} + +__m512i bar(int n) { + struct B res; + res.s = _mm512_set1_epi32(n); + + return res.s; +}