diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index a45640f9a96..0b887e74515 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -8661,6 +8661,8 @@ ix86_expand_set_or_cpymem (rtx dst, rtx src, rtx count_exp, rtx val_exp, if (TARGET_AVX256_SPLIT_REGS && GET_MODE_BITSIZE (move_mode) > 128) move_mode = TImode; + if (TARGET_AVX512_SPLIT_REGS && GET_MODE_BITSIZE (move_mode) > 256) + move_mode = OImode; /* Find the corresponding vector mode with the same size as MOVE_MODE. MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.). */ diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc index 25f21ac2a49..f73af2eb477 100644 --- a/gcc/config/i386/i386-options.cc +++ b/gcc/config/i386/i386-options.cc @@ -2983,6 +2983,8 @@ ix86_option_override_internal (bool main_args_p, } if (ix86_tune_features [X86_TUNE_AVOID_256FMA_CHAINS]) + SET_OPTION_IF_UNSET (opts, opts_set, param_avoid_fma_max_bits, 512); + else if (ix86_tune_features [X86_TUNE_AVOID_256FMA_CHAINS]) SET_OPTION_IF_UNSET (opts, opts_set, param_avoid_fma_max_bits, 256); else if (ix86_tune_features [X86_TUNE_AVOID_128FMA_CHAINS]) SET_OPTION_IF_UNSET (opts, opts_set, param_avoid_fma_max_bits, 128); diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 518cc9ffd1f..de978d19063 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -20363,10 +20363,13 @@ ix86_vec_cost (machine_mode mode, int cost) if (GET_MODE_BITSIZE (mode) == 128 && TARGET_SSE_SPLIT_REGS) - return cost * 2; - if (GET_MODE_BITSIZE (mode) > 128 + return cost * GET_MODE_BITSIZE (mode) / 64; + else if (GET_MODE_BITSIZE (mode) > 128 && TARGET_AVX256_SPLIT_REGS) return cost * GET_MODE_BITSIZE (mode) / 128; + else if (GET_MODE_BITSIZE (mode) > 256 + && TARGET_AVX512_SPLIT_REGS) + return cost * GET_MODE_BITSIZE (mode) / 256; return cost; } @@ -23090,7 +23093,9 @@ ix86_reassociation_width (unsigned int op, machine_mode mode) return 1; /* Account for targets that splits wide vectors into multiple parts. */ - if (TARGET_AVX256_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 128) + if (TARGET_AVX512_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 256) + div = GET_MODE_BITSIZE (mode) / 256; + else if (TARGET_AVX256_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 128) div = GET_MODE_BITSIZE (mode) / 128; else if (TARGET_SSE_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 64) div = GET_MODE_BITSIZE (mode) / 64; diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index d865fcb9466..e6a603ed31a 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -419,6 +419,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST]; ix86_tune_features[X86_TUNE_SOFTWARE_PREFETCHING_BENEFICIAL] #define TARGET_AVX256_SPLIT_REGS \ ix86_tune_features[X86_TUNE_AVX256_SPLIT_REGS] +#define TARGET_AVX512_SPLIT_REGS \ + ix86_tune_features[X86_TUNE_AVX512_SPLIT_REGS] #define TARGET_GENERAL_REGS_SSE_SPILL \ ix86_tune_features[X86_TUNE_GENERAL_REGS_SSE_SPILL] #define TARGET_AVOID_MEM_OPND_FOR_CMOVE \ diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def index db85de20bae..fae3b650434 100644 --- a/gcc/config/i386/x86-tune.def +++ b/gcc/config/i386/x86-tune.def @@ -481,12 +481,12 @@ DEF_TUNE (X86_TUNE_AVOID_4BYTE_PREFIXES, "avoid_4byte_prefixes", /* X86_TUNE_USE_GATHER_2PARTS: Use gather instructions for vectors with 2 elements. */ DEF_TUNE (X86_TUNE_USE_GATHER_2PARTS, "use_gather_2parts", - ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER3 | m_ALDERLAKE | m_CORE_ATOM | m_GENERIC)) + ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER3 | m_ZNVER4 | m_ALDERLAKE | m_CORE_ATOM | m_GENERIC)) /* X86_TUNE_USE_GATHER_4PARTS: Use gather instructions for vectors with 4 elements. */ DEF_TUNE (X86_TUNE_USE_GATHER_4PARTS, "use_gather_4parts", - ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER3 | m_ALDERLAKE | m_CORE_ATOM | m_GENERIC)) + ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER3 | m_ZNVER4 | m_ALDERLAKE | m_CORE_ATOM | m_GENERIC)) /* X86_TUNE_USE_GATHER: Use gather instructions for vectors with 8 or more elements. */ @@ -499,9 +499,13 @@ DEF_TUNE (X86_TUNE_AVOID_128FMA_CHAINS, "avoid_fma_chains", m_ZNVER) /* X86_TUNE_AVOID_256FMA_CHAINS: Avoid creating loops with tight 256bit or smaller FMA chain. */ -DEF_TUNE (X86_TUNE_AVOID_256FMA_CHAINS, "avoid_fma256_chains", m_ZNVER2 | m_ZNVER3 +DEF_TUNE (X86_TUNE_AVOID_256FMA_CHAINS, "avoid_fma256_chains", m_ZNVER2 | m_ZNVER3 | m_ZNVER4 | m_ALDERLAKE | m_SAPPHIRERAPIDS | m_CORE_ATOM) +/* X86_TUNE_AVOID_512FMA_CHAINS: Avoid creating loops with tight 512bit or + smaller FMA chain. */ +DEF_TUNE (X86_TUNE_AVOID_512FMA_CHAINS, "avoid_fma512_chains", m_ZNVER4) + /* X86_TUNE_V2DF_REDUCTION_PREFER_PHADDPD: Prefer haddpd for v2df vector reduction. */ DEF_TUNE (X86_TUNE_V2DF_REDUCTION_PREFER_HADDPD, @@ -532,27 +536,30 @@ DEF_TUNE (X86_TUNE_AVX128_OPTIMAL, "avx128_optimal", m_BDVER | m_BTVER2 /* X86_TUNE_AVX256_OPTIMAL: Use 256-bit AVX instructions instead of 512-bit AVX instructions in the auto-vectorizer. */ -DEF_TUNE (X86_TUNE_AVX256_OPTIMAL, "avx256_optimal", m_CORE_AVX512) +DEF_TUNE (X86_TUNE_AVX256_OPTIMAL, "avx256_optimal", m_CORE_AVX512 | m_ZNVER4) + +/* X86_TUNE_AVX256_SPLIT_REGS: if true, AVX512 ops are split into two AVX256 ops. */ +DEF_TUNE (X86_TUNE_AVX512_SPLIT_REGS, "avx512_split_regs", m_ZNVER4) /* X86_TUNE_AVX256_MOVE_BY_PIECES: Optimize move_by_pieces with 256-bit AVX instructions. */ DEF_TUNE (X86_TUNE_AVX256_MOVE_BY_PIECES, "avx256_move_by_pieces", - m_ALDERLAKE | m_CORE_AVX2) + m_ALDERLAKE | m_CORE_AVX2 | m_ZNVER1 | m_ZNVER2 | m_ZNVER3) /* X86_TUNE_AVX256_STORE_BY_PIECES: Optimize store_by_pieces with 256-bit AVX instructions. */ DEF_TUNE (X86_TUNE_AVX256_STORE_BY_PIECES, "avx256_store_by_pieces", - m_ALDERLAKE | m_CORE_AVX2) + m_ALDERLAKE | m_CORE_AVX2 | m_ZNVER1 | m_ZNVER2 | m_ZNVER3) /* X86_TUNE_AVX512_MOVE_BY_PIECES: Optimize move_by_pieces with 512-bit AVX instructions. */ DEF_TUNE (X86_TUNE_AVX512_MOVE_BY_PIECES, "avx512_move_by_pieces", - m_SAPPHIRERAPIDS) + m_SAPPHIRERAPIDS | m_ZNVER4) /* X86_TUNE_AVX512_STORE_BY_PIECES: Optimize store_by_pieces with 512-bit AVX instructions. */ DEF_TUNE (X86_TUNE_AVX512_STORE_BY_PIECES, "avx512_store_by_pieces", - m_SAPPHIRERAPIDS) + m_SAPPHIRERAPIDS | m_ZNVER4) /*****************************************************************************/ /*****************************************************************************/